From 385e3f0c2b92341efb1654e14968a9996f4e9720 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 6 Jul 2015 08:17:44 -0700 Subject: [PATCH] Add -march=iamcu to optimize for IA MCU IA MCU is based on Intel Pentium ISA without x87 and passing parameters in registers. We want to optimize for IA MCU without changing existing Pentium codegen. This patch adds PROCESSOR_IAMCU for -march=iamcu, which is based on -march=pentium with updated cost tables. gcc/ PR target/66749 * config/i386/i386.c (iamcu_cost): New. (m_IAMCU): Likewise. (initial_ix86_arch_features): Disable X86_ARCH_CMOV for m_IAMCU. (processor_target_table): Add an entry for "iamcu". (processor_alias_table): Likewise. (ix86_issue_rate): Handle PROCESSOR_IAMCU. (ix86_adjust_cost): Likewise. (ia32_multipass_dfa_lookahead): Likewise. * config/i386/i386.h (processor_type): Add PROCESSOR_IAMCU. * config/i386/x86-tune.def: Updated for m_IAMCU. gcc/testsuite/ PR target/66749 * gcc.target/i386/pr66749.c: New test. From-SVN: r225460 --- gcc/config/i386/i386.c | 76 ++++++++++++++++++++++++- gcc/config/i386/i386.h | 1 + gcc/config/i386/x86-tune.def | 36 +++++++----- gcc/testsuite/gcc.target/i386/pr66749.c | 14 +++++ 4 files changed, 111 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr66749.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0a5dfd3554d..9fb8db613a6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -426,6 +426,74 @@ struct processor_costs pentium_cost = { 1, /* cond_not_taken_branch_cost. */ }; +static const +struct processor_costs iamcu_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (4), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (11), /* SI */ + COSTS_N_INSNS (11), /* DI */ + COSTS_N_INSNS (11)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (25), /* HI */ + COSTS_N_INSNS (25), /* SI */ + COSTS_N_INSNS (25), /* DI */ + COSTS_N_INSNS (25)}, /* other */ + COSTS_N_INSNS (3), /* cost of movsx */ + COSTS_N_INSNS (2), /* cost of movzx */ + 8, /* "large" insn */ + 6, /* MOVE_RATIO */ + 6, /* cost for loading QImode using movzbl */ + {2, 4, 2}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {2, 4, 2}, /* cost of storing integer registers */ + 2, /* cost of reg,reg fld/fst */ + {2, 2, 6}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {4, 4, 6}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 8, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {4, 8, 16}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {4, 8, 16}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 3, /* MMX or SSE register to integer */ + 8, /* size of l1 cache. */ + 8, /* size of l2 cache */ + 0, /* size of prefetch block */ + 0, /* number of parallel prefetches */ + 2, /* Branch cost */ + COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (3), /* cost of FMUL instruction. */ + COSTS_N_INSNS (39), /* cost of FDIV instruction. */ + COSTS_N_INSNS (1), /* cost of FABS instruction. */ + COSTS_N_INSNS (1), /* cost of FCHS instruction. */ + COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ + pentium_memcpy, + pentium_memset, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure the alignment). For small blocks inline loop is still a noticeable win, for bigger blocks either rep movsl or rep movsb is @@ -2027,6 +2095,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_386 (1<