From 2f62165dce49a99b72d729aae9f2d992a85c7765 Mon Sep 17 00:00:00 2001 From: Ganesh Gopalasubramanian Date: Thu, 19 Dec 2013 11:04:43 +0000 Subject: [PATCH] Enable TARGET_LOOP_UNROLL_ADJUST for bdver3/bdver4 From-SVN: r206110 --- gcc/ChangeLog | 11 +++++++ gcc/config/i386/i386.c | 62 ++++++++++++++++++++++++++++++++++++ gcc/config/i386/i386.h | 2 ++ gcc/config/i386/x86-tune.def | 6 ++++ 4 files changed, 81 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 13bb35aedca..6f69334aa8a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2013-12-19 Ganesh Gopalasubramanian + + * config/i386/i386.c: Include cfgloop.h. + (ix86_loop_memcount): New function. + (ix86_loop_unroll_adjust): New function. + (TARGET_LOOP_UNROLL_ADJUST): Define. + * config/i386/i386.h + (TARGET_ADJUST_UNROLL): Define. + * config/i386/x86-tune.def + (X86_TUNE_ADJUST_UNROLL): Define. + 2013-12-19 Marek Polacek * config/i386/i386.c (ix86_parse_stringop_strategy_string): Remove diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 862231bf80c..f82d1a40470 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see #include "is-a.h" #include "gimple.h" #include "gimplify.h" +#include "cfgloop.h" #include "dwarf2.h" #include "df.h" #include "tm-constrs.h" @@ -44014,6 +44015,64 @@ ix86_simd_clone_usable (struct cgraph_node *node) } } +/* This function gives out the number of memory references. + This value determines the unrolling factor for + bdver3 and bdver4 architectures. */ + +static int +ix86_loop_memcount (rtx *x, unsigned *mem_count) +{ + if (*x != NULL_RTX && MEM_P (*x)) + { + enum machine_mode mode; + unsigned int n_words; + + mode = GET_MODE (*x); + n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; + + if (n_words > 4) + (*mem_count)+=2; + else + (*mem_count)+=1; + } + return 0; +} + +/* This function adjusts the unroll factor based on + the hardware capabilities. For ex, bdver3 has + a loop buffer which makes unrolling of smaller + loops less important. This function decides the + unroll factor using number of memory references + (value 32 is used) as a heuristic. */ + +static unsigned +ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) +{ + basic_block *bbs; + rtx insn; + unsigned i; + unsigned mem_count = 0; + + if (!TARGET_ADJUST_UNROLL) + return nunroll; + + /* Count the number of memory references within the loop body. */ + bbs = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + { + for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn)) + if (NONDEBUG_INSN_P (insn)) + for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count); + } + free (bbs); + + if (mem_count && mem_count <=32) + return 32/mem_count; + + return nunroll; +} + + /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ static bool @@ -44499,6 +44558,9 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) #define TARGET_INIT_LIBFUNCS darwin_rename_builtins #endif +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust + #undef TARGET_SPILL_CLASS #define TARGET_SPILL_CLASS ix86_spill_class diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7efd1e01f4e..b6e7d4611e6 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -443,6 +443,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] #define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \ ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] +#define TARGET_ADJUST_UNROLL \ + ix86_tune_features[X86_TUNE_ADJUST_UNROLL] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 4c13c3a0ec6..95396850172 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -503,3 +503,9 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0) arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme is usually used for RISC targets. */ DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0) + +/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based + on hardware capabilities. Bdver3 hardware has a loop buffer which makes + unrolling small loop less important. For, such architectures we adjust + the unroll factor so that the unrolled loop fits the loop buffer. */ +DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) -- 2.30.2