1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs iamcu_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 8, /* size of l2 cache */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (39), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+ pentium_memcpy,
+ pentium_memset,
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
(we ensure the alignment). For small blocks inline loop is still a
noticeable win, for bigger blocks either rep movsl or rep movsb is
#define m_386 (1<<PROCESSOR_I386)
#define m_486 (1<<PROCESSOR_I486)
#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_IAMCU (1<<PROCESSOR_IAMCU)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
#define m_NOCONA (1<<PROCESSOR_NOCONA)
ix86_arch_features based on the processor mask. */
static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
/* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
- ~(m_386 | m_486 | m_PENT | m_K6),
+ ~(m_386 | m_486 | m_PENT | m_IAMCU | m_K6),
/* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
~m_386,
{"i386", &i386_cost, 4, 3, 4, 3, 4},
{"i486", &i486_cost, 16, 15, 16, 15, 16},
{"pentium", &pentium_cost, 16, 7, 16, 7, 16},
+ {"iamcu", &iamcu_cost, 16, 7, 16, 7, 16},
{"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
{"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
{"nocona", &nocona_cost, 0, 0, 0, 0, 0},
{"i486", PROCESSOR_I486, CPU_NONE, 0},
{"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
{"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"iamcu", PROCESSOR_IAMCU, CPU_PENTIUM, 0},
{"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
{"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
{"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
+ case PROCESSOR_IAMCU:
case PROCESSOR_BONNELL:
case PROCESSOR_SILVERMONT:
case PROCESSOR_KNL:
switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
+ case PROCESSOR_IAMCU:
/* Address Generation Interlock adds a cycle of latency. */
if (insn_type == TYPE_LEA)
{
switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
+ case PROCESSOR_IAMCU:
return 2;
case PROCESSOR_PENTIUMPRO:
/* X86_TUNE_SCHEDULE: Enable scheduling. */
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
- m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
- | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
+ m_PENT | m_IAMCU | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
+ | m_INTEL | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
on modern chips. Preffer stores affecting whole integer register
/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
over esp subtraction. */
DEF_TUNE (X86_TUNE_SINGLE_PUSH, "single_push", m_386 | m_486 | m_PENT
- | m_K6_GEODE)
+ | m_IAMCU | m_K6_GEODE)
/* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
over esp subtraction. */
-DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push", m_PENT | m_K6_GEODE)
+DEF_TUNE (X86_TUNE_DOUBLE_PUSH, "double_push", m_PENT | m_IAMCU
+ | m_K6_GEODE)
/* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
over esp addition. */
-DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT | m_PPRO)
+DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT
+ | m_IAMCU | m_PPRO)
/* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
over esp addition. */
-DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT)
+DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT | m_IAMCU)
/*****************************************************************************/
/* Branch predictor tuning */
/* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such
as "add mem, reg". */
-DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_PPRO))
+DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_IAMCU | m_PPRO))
/* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */
DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
- ~(m_PENT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL | m_K6))
+ ~(m_PENT | m_IAMCU | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
+ | m_K6))
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
/* X86_TUNE_USE_SIMODE_FIOP: Enables use of x87 instructions with 32bit
integer operand. */
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
- ~(m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
- | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
+ ~(m_PENT | m_IAMCU | m_PPRO | m_CORE_ALL | m_BONNELL
+ | m_SILVERMONT | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
/* X86_TUNE_ZERO_EXTEND_WITH_AND: Use AND instruction instead
of mozbl/movwl. */
-DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and", m_486 | m_PENT)
+DEF_TUNE (X86_TUNE_ZERO_EXTEND_WITH_AND, "zero_extend_with_and",
+ m_486 | m_PENT | m_IAMCU)
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
and SImode multiply, but 386 and 486 do HImode multiply faster. */
/* X86_TUNE_FAST_PREFIX: Enable demoting some 32bit or 64bit arithmetic
into 16bit/8bit when resulting sequence is shorter. For example
for "and $-65536, reg" to 16bit store of 0. */
-DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix", ~(m_386 | m_486 | m_PENT))
+DEF_TUNE (X86_TUNE_FAST_PREFIX, "fast_prefix",
+ ~(m_386 | m_486 | m_PENT | m_IAMCU))
/* X86_TUNE_READ_MODIFY_WRITE: Enable use of read modify write instructions
such as "add $1, mem". */
-DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write", ~m_PENT)
+DEF_TUNE (X86_TUNE_READ_MODIFY_WRITE, "read_modify_write",
+ ~(m_PENT | m_IAMCU))
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
than a MOV. */
-DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or", m_PENT)
+DEF_TUNE (X86_TUNE_MOVE_M1_VIA_OR, "move_m1_via_or", m_PENT | m_IAMCU)
/* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
but one byte longer. */
-DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT)
+DEF_TUNE (X86_TUNE_NOT_UNPAIRABLE, "not_unpairable", m_PENT | m_IAMCU)
/* X86_TUNE_PARTIAL_REG_STALL: Pentium pro, unlike later chips, handled
use of partial registers by renaming. This improved performance of 16bit