From 6a569cdda92269d42ae8e2cccd8c0f07d39b5d0c Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Mon, 24 Nov 2014 10:37:30 +0000 Subject: [PATCH] [AArch64][1/5] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P * config/aarch64/aarch64-protos.h (struct tune_params): Add fuseable_ops field. * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops. (cortexa53_tunings): Likewise. (cortexa57_tunings): Likewise. (thunderx_tunings): Likewise. (aarch64_macro_fusion_p): New function. (aarch_macro_fusion_pair_p): Likewise. (TARGET_SCHED_MACRO_FUSION_P): Define. (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise. (AARCH64_FUSE_MOV_MOVK): Likewise. (AARCH64_FUSE_NOTHING): Likewise. From-SVN: r218007 --- gcc/ChangeLog | 15 ++++++ gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64.c | 74 +++++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8f42d090245..562ef8a0582 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2014-11-24 Kyrylo Tkachov + + * config/aarch64/aarch64-protos.h (struct tune_params): Add + fuseable_ops field. + * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops. + (cortexa53_tunings): Likewise. + (cortexa57_tunings): Likewise. + (thunderx_tunings): Likewise. + (aarch64_macro_fusion_p): New function. + (aarch_macro_fusion_pair_p): Likewise. + (TARGET_SCHED_MACRO_FUSION_P): Define. + (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise. + (AARCH64_FUSE_MOV_MOVK): Likewise. + (AARCH64_FUSE_NOTHING): Likewise. + 2014-11-24 Martin Liska PR lto/63968 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 52e97e745a0..f5db563189d 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -170,6 +170,7 @@ struct tune_params const struct cpu_vector_cost *const vec_costs; const int memmov_cost; const int issue_rate; + const unsigned int fuseable_ops; }; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 18095136648..e9be8bc95e5 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -304,6 +304,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost = NAMED_PARAM (cond_not_taken_branch_cost, 1) }; +#define AARCH64_FUSE_NOTHING (0) +#define AARCH64_FUSE_MOV_MOVK (1 << 0) + #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif @@ -314,7 +317,8 @@ static const struct tune_params generic_tunings = &generic_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) }; static const struct tune_params cortexa53_tunings = @@ -324,7 +328,8 @@ static const struct tune_params cortexa53_tunings = &cortexa53_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK) }; static const struct tune_params cortexa57_tunings = @@ -334,7 +339,8 @@ static const struct tune_params cortexa57_tunings = &cortexa57_regmove_cost, &cortexa57_vector_cost, NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 3) + NAMED_PARAM (issue_rate, 3), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK) }; static const struct tune_params thunderx_tunings = @@ -344,7 +350,8 @@ static const struct tune_params thunderx_tunings = &thunderx_regmove_cost, &generic_vector_cost, NAMED_PARAM (memmov_cost, 6), - NAMED_PARAM (issue_rate, 2) + NAMED_PARAM (issue_rate, 2), + NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING) }; /* A processor implementing AArch64. */ @@ -10370,6 +10377,59 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code) #undef TARGET_GEN_CCMP_NEXT #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next +/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports + instruction fusion of some sort. */ + +static bool +aarch64_macro_fusion_p (void) +{ + return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING; +} + + +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR + should be kept together during scheduling. */ + +static bool +aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx set_dest; + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + + if (!aarch64_macro_fusion_p ()) + return false; + + if (simple_sets_p + && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)) + { + /* We are trying to match: + prev (mov) == (set (reg r0) (const_int imm16)) + curr (movk) == (set (zero_extract (reg r0) + (const_int 16) + (const_int 16)) + (const_int imm16_1)) */ + + set_dest = SET_DEST (curr_set); + + if (GET_CODE (set_dest) == ZERO_EXTRACT + && CONST_INT_P (SET_SRC (curr_set)) + && CONST_INT_P (SET_SRC (prev_set)) + && CONST_INT_P (XEXP (set_dest, 2)) + && INTVAL (XEXP (set_dest, 2)) == 16 + && REG_P (XEXP (set_dest, 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) + { + return true; + } + } + + return false; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -10629,6 +10689,12 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code) #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h" -- 2.30.2