From 2140297cb3091c19ccf8fd3d741dcd840aa0ff8b Mon Sep 17 00:00:00 2001 From: Chung-Ju Wu Date: Sat, 2 Jun 2018 11:14:04 +0000 Subject: [PATCH] [NDS32] Implement fp-as-gp optimization. gcc/ * config/nds32/constants.md (unspec_volatile_element): Add UNSPEC_VOLATILE_OMIT_FP_BEGIN and UNSPEC_VOLATILE_OMIT_FP_END. * config/nds32/nds32-fp-as-gp.c: New implementation of fp_as_gp optimization. * config/nds32/nds32-protos.h (nds32_naked_function_p): Declare. (make_pass_nds32_fp_as_gp): Declare. * config/nds32/nds32.c (nds32_register_passes): Add fp_as_gp as one optmization pass. (nds32_asm_function_end_prologue): Remove unused asm output. (nds32_asm_function_begin_epilogue): Remove unused asm output. (nds32_asm_file_start): Output necessary fp_as_gp information. (nds32_option_override): Adjust register usage. (nds32_expand_prologue): Consider fp_as_gp situation. (nds32_expand_prologue_v3push): Consider fp_as_gp situation. * config/nds32/nds32.md (prologue): Check fp_as_gp_p and naked_p. (epilogue): Ditto. (return): Ditto. (simple_return): Ditto. (omit_fp_begin): Output special directive for fp_as_gp. (omit_fp_end): Output special directive for fp_as_gp. * config/nds32/nds32.opt (mfp-as-gp, mno-fp-as-gp, mforce-fp-as-gp, mforbid-fp-as-gp): New options. Co-Authored-By: Shiva Chen From-SVN: r261115 --- gcc/ChangeLog | 26 ++++ gcc/config/nds32/constants.md | 2 + gcc/config/nds32/nds32-fp-as-gp.c | 246 +++++++++++++++++++++++++++++- gcc/config/nds32/nds32-protos.h | 4 +- gcc/config/nds32/nds32.c | 77 ++++------ gcc/config/nds32/nds32.md | 50 +++++- gcc/config/nds32/nds32.opt | 15 ++ 7 files changed, 366 insertions(+), 54 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3278823415c..8f1bd35b71e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2018-06-02 Chung-Ju Wu + Shiva Chen + + * config/nds32/constants.md (unspec_volatile_element): Add + UNSPEC_VOLATILE_OMIT_FP_BEGIN and UNSPEC_VOLATILE_OMIT_FP_END. + * config/nds32/nds32-fp-as-gp.c: New implementation of fp_as_gp + optimization. + * config/nds32/nds32-protos.h (nds32_naked_function_p): Declare. + (make_pass_nds32_fp_as_gp): Declare. + * config/nds32/nds32.c (nds32_register_passes): Add fp_as_gp as one + optmization pass. + (nds32_asm_function_end_prologue): Remove unused asm output. + (nds32_asm_function_begin_epilogue): Remove unused asm output. + (nds32_asm_file_start): Output necessary fp_as_gp information. + (nds32_option_override): Adjust register usage. + (nds32_expand_prologue): Consider fp_as_gp situation. + (nds32_expand_prologue_v3push): Consider fp_as_gp situation. + * config/nds32/nds32.md (prologue): Check fp_as_gp_p and naked_p. + (epilogue): Ditto. + (return): Ditto. + (simple_return): Ditto. + (omit_fp_begin): Output special directive for fp_as_gp. + (omit_fp_end): Output special directive for fp_as_gp. + * config/nds32/nds32.opt (mfp-as-gp, mno-fp-as-gp, mforce-fp-as-gp, + mforbid-fp-as-gp): New options. + 2018-06-01 Mark Wielaard * dwarf2out.c (dwarf2out_finish): Remove generation of diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md index c2994ab386a..6d42f50c882 100644 --- a/gcc/config/nds32/constants.md +++ b/gcc/config/nds32/constants.md @@ -169,6 +169,8 @@ UNSPEC_VOLATILE_SET_TRIG_EDGE UNSPEC_VOLATILE_GET_TRIG_TYPE UNSPEC_VOLATILE_RELAX_GROUP + UNSPEC_VOLATILE_OMIT_FP_BEGIN + UNSPEC_VOLATILE_OMIT_FP_END UNSPEC_VOLATILE_POP25_RETURN UNSPEC_VOLATILE_UNALIGNED_FEATURE UNSPEC_VOLATILE_ENABLE_UNALIGNED diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c index 95c9586c3b6..1abad1dc24a 100644 --- a/gcc/config/nds32/nds32-fp-as-gp.c +++ b/gcc/config/nds32/nds32-fp-as-gp.c @@ -26,19 +26,255 @@ #include "system.h" #include "coretypes.h" #include "backend.h" +#include "hard-reg-set.h" +#include "tm_p.h" +#include "rtl.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "insn-config.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "ira.h" +#include "ira-int.h" +#include "df.h" +#include "tree-core.h" +#include "tree-pass.h" +#include "nds32-protos.h" /* ------------------------------------------------------------------------ */ +/* A helper function to check if this function should contain prologue. */ +static bool +nds32_have_prologue_p (void) +{ + int i; + + for (i = 0; i < 28; i++) + if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) + return true; + + return (flag_pic + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); +} + +static int +nds32_get_symbol_count (void) +{ + int symbol_count = 0; + rtx_insn *insn; + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + FOR_BB_INSNS (bb, insn) + { + /* Counting the insn number which the addressing mode is symbol. */ + if (single_set (insn) && nds32_symbol_load_store_p (insn)) + { + rtx pattern = PATTERN (insn); + rtx mem; + gcc_assert (GET_CODE (pattern) == SET); + if (GET_CODE (SET_SRC (pattern)) == REG ) + mem = SET_DEST (pattern); + else + mem = SET_SRC (pattern); + + /* We have only lwi37 and swi37 for fp-as-gp optimization, + so don't count any other than SImode. + MEM for QImode and HImode will wrap by ZERO_EXTEND + or SIGN_EXTEND */ + if (GET_CODE (mem) == MEM) + symbol_count++; + } + } + } + + return symbol_count; +} + /* Function to determine whether it is worth to do fp_as_gp optimization. - Return 0: It is NOT worth to do fp_as_gp optimization. - Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. + Return false: It is NOT worth to do fp_as_gp optimization. + Return true: It is APPROXIMATELY worth to do fp_as_gp optimization. Note that if it is worth to do fp_as_gp optimization, we MUST set FP_REGNUM ever live in this function. */ -int +static bool nds32_fp_as_gp_check_available (void) { - /* By default we return 0. */ - return 0; + basic_block bb; + basic_block exit_bb; + edge_iterator ei; + edge e; + bool first_exit_blocks_p; + + /* If there exists ANY of following conditions, + we DO NOT perform fp_as_gp optimization: + 1. TARGET_FORBID_FP_AS_GP is set + regardless of the TARGET_FORCE_FP_AS_GP. + 2. User explicitly uses 'naked'/'no_prologue' attribute. + We use nds32_naked_function_p() to help such checking. + 3. Not optimize for size. + 4. Need frame pointer. + 5. If $fp is already required to be saved, + it means $fp is already choosen by register allocator. + Thus we better not to use it for fp_as_gp optimization. + 6. This function is a vararg function. + DO NOT apply fp_as_gp optimization on this function + because it may change and break stack frame. + 7. The epilogue is empty. + This happens when the function uses exit() + or its attribute is no_return. + In that case, compiler will not expand epilogue + so that we have no chance to output .omit_fp_end directive. */ + if (TARGET_FORBID_FP_AS_GP + || nds32_naked_function_p (current_function_decl) + || !optimize_size + || frame_pointer_needed + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || (cfun->stdarg == 1) + || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) + return false; + + /* Disable fp_as_gp if there is any infinite loop since the fp may + reuse in infinite loops by register rename. + For check infinite loops we should make sure exit_bb is post dominate + all other basic blocks if there is no infinite loops. */ + first_exit_blocks_p = true; + exit_bb = NULL; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + /* More than one exit block also do not perform fp_as_gp optimization. */ + if (!first_exit_blocks_p) + return false; + + exit_bb = e->src; + first_exit_blocks_p = false; + } + + /* Not found exit_bb? just abort fp_as_gp! */ + if (!exit_bb) + return false; + + /* Each bb should post dominate by exit_bb if there is no infinite loop! */ + FOR_EACH_BB_FN (bb, cfun) + { + if (!dominated_by_p (CDI_POST_DOMINATORS, + bb, + exit_bb)) + return false; + } + + /* Now we can check the possibility of using fp_as_gp optimization. */ + if (TARGET_FORCE_FP_AS_GP) + { + /* User explicitly issues -mforce-fp-as-gp option. */ + return true; + } + else + { + /* In the following we are going to evaluate whether + it is worth to do fp_as_gp optimization. */ + bool good_gain = false; + int symbol_count; + + int threshold; + + /* We check if there already requires prologue. + Note that $gp will be saved in prologue for PIC code generation. + After that, we can set threshold by the existence of prologue. + Each fp-implied instruction will gain 2-byte code size + from gp-aware instruction, so we have following heuristics. */ + if (flag_pic + || nds32_have_prologue_p ()) + { + /* Have-prologue: + Compiler already intends to generate prologue content, + so the fp_as_gp optimization will only insert + 'la $fp,_FP_BASE_' instruction, which will be + converted into 4-byte instruction at link time. + The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ + threshold = 3; + } + else + { + /* None-prologue: + Compiler originally does not generate prologue content, + so the fp_as_gp optimization will NOT ONLY insert + 'la $fp,_FP_BASE' instruction, but also causes + push/pop instructions. + If we are using v3push (push25/pop25), + the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; + If we are using normal push (smw/lmw), + the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ + threshold = 5 + (TARGET_V3PUSH ? 0 : 2); + } + + symbol_count = nds32_get_symbol_count (); + + if (symbol_count >= threshold) + good_gain = true; + + /* Enable fp_as_gp optimization when potential gain is good enough. */ + return good_gain; + } +} + +static unsigned int +nds32_fp_as_gp (void) +{ + bool fp_as_gp_p; + calculate_dominance_info (CDI_POST_DOMINATORS); + fp_as_gp_p = nds32_fp_as_gp_check_available (); + + /* Here is a hack to IRA for enable/disable a hard register per function. + We *MUST* review this way after migrate gcc 4.9! */ + if (fp_as_gp_p) { + SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); + df_set_regs_ever_live (FP_REGNUM, 1); + } else { + CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM); + } + + cfun->machine->fp_as_gp_p = fp_as_gp_p; + + free_dominance_info (CDI_POST_DOMINATORS); + return 1; +} + +const pass_data pass_data_nds32_fp_as_gp = +{ + RTL_PASS, /* type */ + "fp_as_gp", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ +}; + +class pass_nds32_fp_as_gp : public rtl_opt_pass +{ +public: + pass_nds32_fp_as_gp (gcc::context *ctxt) + : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) + { + return TARGET_16_BIT + && optimize_size; + } + unsigned int execute (function *) { return nds32_fp_as_gp (); } +}; + +rtl_opt_pass * +make_pass_nds32_fp_as_gp (gcc::context *ctxt) +{ + return new pass_nds32_fp_as_gp (ctxt); } /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 8a93002e09d..3fa8ae175f2 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -155,9 +155,8 @@ extern int nds32_adjust_insn_length (rtx_insn *, int); /* Auxiliary functions for FP_AS_GP detection. */ -extern int nds32_fp_as_gp_check_available (void); - extern bool nds32_symbol_load_store_p (rtx_insn *); +extern bool nds32_naked_function_p (tree); /* Auxiliary functions for jump table generation. */ @@ -367,5 +366,6 @@ extern bool nds32_use_load_post_increment(machine_mode); /* Functions for create nds32 specific optimization pass. */ extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *); +extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *); /* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index 9fcd24f7e34..1afd8a10156 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -1391,7 +1391,7 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type) } /* Return true if FUNC is a naked function. */ -static bool +bool nds32_naked_function_p (tree func) { /* FOR BACKWARD COMPATIBILITY, @@ -1626,6 +1626,11 @@ nds32_register_pass ( static void nds32_register_passes (void) { + nds32_register_pass ( + make_pass_nds32_fp_as_gp, + PASS_POS_INSERT_BEFORE, + "ira"); + nds32_register_pass ( make_pass_nds32_relax_opt, PASS_POS_INSERT_AFTER, @@ -2191,56 +2196,12 @@ static void nds32_asm_function_end_prologue (FILE *file) { fprintf (file, "\t! END PROLOGUE\n"); - - /* If frame pointer is NOT needed and -mfp-as-gp is issued, - we can generate special directive: ".omit_fp_begin" - to guide linker doing fp-as-gp optimization. - However, for a naked function, which means - it should not have prologue/epilogue, - using fp-as-gp still requires saving $fp by push/pop behavior and - there is no benefit to use fp-as-gp on such small function. - So we need to make sure this function is NOT naked as well. */ - if (!frame_pointer_needed - && !cfun->machine->naked_p - && cfun->machine->fp_as_gp_p) - { - fprintf (file, "\t! ----------------------------------------\n"); - fprintf (file, "\t! Guide linker to do " - "link time optimization: fp-as-gp\n"); - fprintf (file, "\t! We add one more instruction to " - "initialize $fp near to $gp location.\n"); - fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n"); - fprintf (file, "\t! this extra instruction should be " - "eliminated at link stage.\n"); - fprintf (file, "\t.omit_fp_begin\n"); - fprintf (file, "\tla\t$fp,_FP_BASE_\n"); - fprintf (file, "\t! ----------------------------------------\n"); - } } /* Before rtl epilogue has been expanded, this function is used. */ static void nds32_asm_function_begin_epilogue (FILE *file) { - /* If frame pointer is NOT needed and -mfp-as-gp is issued, - we can generate special directive: ".omit_fp_end" - to claim fp-as-gp optimization range. - However, for a naked function, - which means it should not have prologue/epilogue, - using fp-as-gp still requires saving $fp by push/pop behavior and - there is no benefit to use fp-as-gp on such small function. - So we need to make sure this function is NOT naked as well. */ - if (!frame_pointer_needed - && !cfun->machine->naked_p - && cfun->machine->fp_as_gp_p) - { - fprintf (file, "\t! ----------------------------------------\n"); - fprintf (file, "\t! Claim the range of fp-as-gp " - "link time optimization\n"); - fprintf (file, "\t.omit_fp_end\n"); - fprintf (file, "\t! ----------------------------------------\n"); - } - fprintf (file, "\t! BEGIN EPILOGUE\n"); } @@ -3168,6 +3129,18 @@ nds32_asm_file_start (void) "for checking inconsistency on interrupt handler\n"); fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); + /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os, + the compiler may produce 'la $fp,_FP_BASE_' instruction + at prologue for fp-as-gp optimization. + We should emit weak reference of _FP_BASE_ to avoid undefined reference + in case user does not pass '--relax' option to linker. */ + if (TARGET_FORCE_FP_AS_GP || optimize_size) + { + fprintf (asm_out_file, "\t! This weak reference is required to do " + "fp-as-gp link time optimization\n"); + fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n"); + } + fprintf (asm_out_file, "\t! ------------------------------------\n"); if (TARGET_ISA_V2) @@ -4126,6 +4099,12 @@ nds32_option_override (void) fixed_regs[r] = call_used_regs[r] = 1; } + /* See if user explicitly would like to use fp-as-gp optimization. + If so, we must prevent $fp from being allocated + during register allocation. */ + if (TARGET_FORCE_FP_AS_GP) + fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1; + if (!TARGET_16_BIT) { /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */ @@ -4544,6 +4523,10 @@ nds32_expand_prologue (void) The result will be in cfun->machine. */ nds32_compute_stack_frame (); + /* Check frame_pointer_needed again to prevent fp is need after reload. */ + if (frame_pointer_needed) + cfun->machine->fp_as_gp_p = false; + /* If this is a variadic function, first we need to push argument registers that hold the unnamed argument value. */ if (cfun->machine->va_args_size != 0) @@ -4951,6 +4934,10 @@ nds32_expand_prologue_v3push (void) if (cfun->machine->callee_saved_gpr_regs_size > 0) df_set_regs_ever_live (FP_REGNUM, 1); + /* Check frame_pointer_needed again to prevent fp is need after reload. */ + if (frame_pointer_needed) + cfun->machine->fp_as_gp_p = false; + /* If the function is 'naked', we do not have to generate prologue code fragment. */ if (cfun->machine->naked_p && !flag_pic) diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md index 66ef03327f4..cf1ad9bd1b0 100644 --- a/gcc/config/nds32/nds32.md +++ b/gcc/config/nds32/nds32.md @@ -1830,12 +1830,33 @@ nds32_expand_prologue_v3push (); else nds32_expand_prologue (); + + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM))); + DONE; }) (define_expand "epilogue" [(const_int 0)] "" { + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM))); + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. In addition, we need to check if v3push is indeed available. */ if (NDS32_V3PUSH_AVAILABLE_P) @@ -1935,7 +1956,8 @@ "nds32_can_use_return_insn ()" { /* Emit as the simple return. */ - if (cfun->machine->naked_p + if (!cfun->machine->fp_as_gp_p + && cfun->machine->naked_p && (cfun->machine->va_args_size == 0)) { emit_jump_insn (gen_return_internal ()); @@ -1945,9 +1967,14 @@ ;; This pattern is expanded only by the shrink-wrapping optimization ;; on paths where the function prologue has not been executed. +;; However, such optimization may reorder the prologue/epilogue blocks +;; together with basic blocks within function body. +;; So we must disable this pattern if we have already decided +;; to perform fp_as_gp optimization, which requires prologue to be +;; first block and epilogue to be last block. (define_expand "simple_return" [(simple_return)] - "" + "!cfun->machine->fp_as_gp_p" "" ) @@ -2162,6 +2189,25 @@ [(set_attr "length" "0")] ) +;; Output .omit_fp_begin for fp-as-gp optimization. +;; Also we have to set $fp register. +(define_insn "omit_fp_begin" + [(set (match_operand:SI 0 "register_operand" "=x") + (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))] + "" + "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----" + [(set_attr "length" "8")] +) + +;; Output .omit_fp_end for fp-as-gp optimization. +;; Claim that we have to use $fp register. +(define_insn "omit_fp_end" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)] + "" + "! -----\;.omit_fp_end\;! -----" + [(set_attr "length" "0")] +) + (define_insn "pop25return" [(return) (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)] diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt index d32e2ec1165..6f73f897e5d 100644 --- a/gcc/config/nds32/nds32.opt +++ b/gcc/config/nds32/nds32.opt @@ -32,6 +32,13 @@ EL Target RejectNegative Alias(mlittle-endian) Generate code in little-endian mode. +mfp-as-gp +Target RejectNegative Alias(mforce-fp-as-gp) +Force performing fp-as-gp optimization. + +mno-fp-as-gp +Target RejectNegative Alias(mforbid-fp-as-gp) +Forbid performing fp-as-gp optimization. ; --------------------------------------------------------------- @@ -85,6 +92,14 @@ mlittle-endian Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) Generate code in little-endian mode. +mforce-fp-as-gp +Target Undocumented Mask(FORCE_FP_AS_GP) +Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization. + +mforbid-fp-as-gp +Target Undocumented Mask(FORBID_FP_AS_GP) +Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'. + mict-model= Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL) Specify the address generation strategy for ICT call's code model. -- 2.30.2