+2018-06-02 Chung-Ju Wu <jasonwucj@gmail.com>
+ Shiva Chen <shiva0217@gmail.com>
+
+ * config/nds32/constants.md (unspec_volatile_element): Add
+ UNSPEC_VOLATILE_OMIT_FP_BEGIN and UNSPEC_VOLATILE_OMIT_FP_END.
+ * config/nds32/nds32-fp-as-gp.c: New implementation of fp_as_gp
+ optimization.
+ * config/nds32/nds32-protos.h (nds32_naked_function_p): Declare.
+ (make_pass_nds32_fp_as_gp): Declare.
+ * config/nds32/nds32.c (nds32_register_passes): Add fp_as_gp as one
+ optmization pass.
+ (nds32_asm_function_end_prologue): Remove unused asm output.
+ (nds32_asm_function_begin_epilogue): Remove unused asm output.
+ (nds32_asm_file_start): Output necessary fp_as_gp information.
+ (nds32_option_override): Adjust register usage.
+ (nds32_expand_prologue): Consider fp_as_gp situation.
+ (nds32_expand_prologue_v3push): Consider fp_as_gp situation.
+ * config/nds32/nds32.md (prologue): Check fp_as_gp_p and naked_p.
+ (epilogue): Ditto.
+ (return): Ditto.
+ (simple_return): Ditto.
+ (omit_fp_begin): Output special directive for fp_as_gp.
+ (omit_fp_end): Output special directive for fp_as_gp.
+ * config/nds32/nds32.opt (mfp-as-gp, mno-fp-as-gp, mforce-fp-as-gp,
+ mforbid-fp-as-gp): New options.
+
2018-06-01 Mark Wielaard <mark@klomp.org>
* dwarf2out.c (dwarf2out_finish): Remove generation of
UNSPEC_VOLATILE_SET_TRIG_EDGE
UNSPEC_VOLATILE_GET_TRIG_TYPE
UNSPEC_VOLATILE_RELAX_GROUP
+ UNSPEC_VOLATILE_OMIT_FP_BEGIN
+ UNSPEC_VOLATILE_OMIT_FP_END
UNSPEC_VOLATILE_POP25_RETURN
UNSPEC_VOLATILE_UNALIGNED_FEATURE
UNSPEC_VOLATILE_ENABLE_UNALIGNED
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "hard-reg-set.h"
+#include "tm_p.h"
+#include "rtl.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "insn-config.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "ira.h"
+#include "ira-int.h"
+#include "df.h"
+#include "tree-core.h"
+#include "tree-pass.h"
+#include "nds32-protos.h"
/* ------------------------------------------------------------------------ */
+/* A helper function to check if this function should contain prologue. */
+static bool
+nds32_have_prologue_p (void)
+{
+ int i;
+
+ for (i = 0; i < 28; i++)
+ if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
+ return true;
+
+ return (flag_pic
+ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+ || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
+}
+
+static int
+nds32_get_symbol_count (void)
+{
+ int symbol_count = 0;
+ rtx_insn *insn;
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ /* Counting the insn number which the addressing mode is symbol. */
+ if (single_set (insn) && nds32_symbol_load_store_p (insn))
+ {
+ rtx pattern = PATTERN (insn);
+ rtx mem;
+ gcc_assert (GET_CODE (pattern) == SET);
+ if (GET_CODE (SET_SRC (pattern)) == REG )
+ mem = SET_DEST (pattern);
+ else
+ mem = SET_SRC (pattern);
+
+ /* We have only lwi37 and swi37 for fp-as-gp optimization,
+ so don't count any other than SImode.
+ MEM for QImode and HImode will wrap by ZERO_EXTEND
+ or SIGN_EXTEND */
+ if (GET_CODE (mem) == MEM)
+ symbol_count++;
+ }
+ }
+ }
+
+ return symbol_count;
+}
+
/* Function to determine whether it is worth to do fp_as_gp optimization.
- Return 0: It is NOT worth to do fp_as_gp optimization.
- Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
+ Return false: It is NOT worth to do fp_as_gp optimization.
+ Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
Note that if it is worth to do fp_as_gp optimization,
we MUST set FP_REGNUM ever live in this function. */
-int
+static bool
nds32_fp_as_gp_check_available (void)
{
- /* By default we return 0. */
- return 0;
+ basic_block bb;
+ basic_block exit_bb;
+ edge_iterator ei;
+ edge e;
+ bool first_exit_blocks_p;
+
+ /* If there exists ANY of following conditions,
+ we DO NOT perform fp_as_gp optimization:
+ 1. TARGET_FORBID_FP_AS_GP is set
+ regardless of the TARGET_FORCE_FP_AS_GP.
+ 2. User explicitly uses 'naked'/'no_prologue' attribute.
+ We use nds32_naked_function_p() to help such checking.
+ 3. Not optimize for size.
+ 4. Need frame pointer.
+ 5. If $fp is already required to be saved,
+ it means $fp is already choosen by register allocator.
+ Thus we better not to use it for fp_as_gp optimization.
+ 6. This function is a vararg function.
+ DO NOT apply fp_as_gp optimization on this function
+ because it may change and break stack frame.
+ 7. The epilogue is empty.
+ This happens when the function uses exit()
+ or its attribute is no_return.
+ In that case, compiler will not expand epilogue
+ so that we have no chance to output .omit_fp_end directive. */
+ if (TARGET_FORBID_FP_AS_GP
+ || nds32_naked_function_p (current_function_decl)
+ || !optimize_size
+ || frame_pointer_needed
+ || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
+ || (cfun->stdarg == 1)
+ || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
+ return false;
+
+ /* Disable fp_as_gp if there is any infinite loop since the fp may
+ reuse in infinite loops by register rename.
+ For check infinite loops we should make sure exit_bb is post dominate
+ all other basic blocks if there is no infinite loops. */
+ first_exit_blocks_p = true;
+ exit_bb = NULL;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+ {
+ /* More than one exit block also do not perform fp_as_gp optimization. */
+ if (!first_exit_blocks_p)
+ return false;
+
+ exit_bb = e->src;
+ first_exit_blocks_p = false;
+ }
+
+ /* Not found exit_bb? just abort fp_as_gp! */
+ if (!exit_bb)
+ return false;
+
+ /* Each bb should post dominate by exit_bb if there is no infinite loop! */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (!dominated_by_p (CDI_POST_DOMINATORS,
+ bb,
+ exit_bb))
+ return false;
+ }
+
+ /* Now we can check the possibility of using fp_as_gp optimization. */
+ if (TARGET_FORCE_FP_AS_GP)
+ {
+ /* User explicitly issues -mforce-fp-as-gp option. */
+ return true;
+ }
+ else
+ {
+ /* In the following we are going to evaluate whether
+ it is worth to do fp_as_gp optimization. */
+ bool good_gain = false;
+ int symbol_count;
+
+ int threshold;
+
+ /* We check if there already requires prologue.
+ Note that $gp will be saved in prologue for PIC code generation.
+ After that, we can set threshold by the existence of prologue.
+ Each fp-implied instruction will gain 2-byte code size
+ from gp-aware instruction, so we have following heuristics. */
+ if (flag_pic
+ || nds32_have_prologue_p ())
+ {
+ /* Have-prologue:
+ Compiler already intends to generate prologue content,
+ so the fp_as_gp optimization will only insert
+ 'la $fp,_FP_BASE_' instruction, which will be
+ converted into 4-byte instruction at link time.
+ The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */
+ threshold = 3;
+ }
+ else
+ {
+ /* None-prologue:
+ Compiler originally does not generate prologue content,
+ so the fp_as_gp optimization will NOT ONLY insert
+ 'la $fp,_FP_BASE' instruction, but also causes
+ push/pop instructions.
+ If we are using v3push (push25/pop25),
+ the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
+ If we are using normal push (smw/lmw),
+ the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */
+ threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
+ }
+
+ symbol_count = nds32_get_symbol_count ();
+
+ if (symbol_count >= threshold)
+ good_gain = true;
+
+ /* Enable fp_as_gp optimization when potential gain is good enough. */
+ return good_gain;
+ }
+}
+
+static unsigned int
+nds32_fp_as_gp (void)
+{
+ bool fp_as_gp_p;
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ fp_as_gp_p = nds32_fp_as_gp_check_available ();
+
+ /* Here is a hack to IRA for enable/disable a hard register per function.
+ We *MUST* review this way after migrate gcc 4.9! */
+ if (fp_as_gp_p) {
+ SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+ df_set_regs_ever_live (FP_REGNUM, 1);
+ } else {
+ CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
+ }
+
+ cfun->machine->fp_as_gp_p = fp_as_gp_p;
+
+ free_dominance_info (CDI_POST_DOMINATORS);
+ return 1;
+}
+
+const pass_data pass_data_nds32_fp_as_gp =
+{
+ RTL_PASS, /* type */
+ "fp_as_gp", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_MACH_DEP, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0 /* todo_flags_finish */
+};
+
+class pass_nds32_fp_as_gp : public rtl_opt_pass
+{
+public:
+ pass_nds32_fp_as_gp (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *)
+ {
+ return TARGET_16_BIT
+ && optimize_size;
+ }
+ unsigned int execute (function *) { return nds32_fp_as_gp (); }
+};
+
+rtl_opt_pass *
+make_pass_nds32_fp_as_gp (gcc::context *ctxt)
+{
+ return new pass_nds32_fp_as_gp (ctxt);
}
/* ------------------------------------------------------------------------ */
/* Auxiliary functions for FP_AS_GP detection. */
-extern int nds32_fp_as_gp_check_available (void);
-
extern bool nds32_symbol_load_store_p (rtx_insn *);
+extern bool nds32_naked_function_p (tree);
/* Auxiliary functions for jump table generation. */
/* Functions for create nds32 specific optimization pass. */
extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
+extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
/* ------------------------------------------------------------------------ */
}
/* Return true if FUNC is a naked function. */
-static bool
+bool
nds32_naked_function_p (tree func)
{
/* FOR BACKWARD COMPATIBILITY,
static void
nds32_register_passes (void)
{
+ nds32_register_pass (
+ make_pass_nds32_fp_as_gp,
+ PASS_POS_INSERT_BEFORE,
+ "ira");
+
nds32_register_pass (
make_pass_nds32_relax_opt,
PASS_POS_INSERT_AFTER,
nds32_asm_function_end_prologue (FILE *file)
{
fprintf (file, "\t! END PROLOGUE\n");
-
- /* If frame pointer is NOT needed and -mfp-as-gp is issued,
- we can generate special directive: ".omit_fp_begin"
- to guide linker doing fp-as-gp optimization.
- However, for a naked function, which means
- it should not have prologue/epilogue,
- using fp-as-gp still requires saving $fp by push/pop behavior and
- there is no benefit to use fp-as-gp on such small function.
- So we need to make sure this function is NOT naked as well. */
- if (!frame_pointer_needed
- && !cfun->machine->naked_p
- && cfun->machine->fp_as_gp_p)
- {
- fprintf (file, "\t! ----------------------------------------\n");
- fprintf (file, "\t! Guide linker to do "
- "link time optimization: fp-as-gp\n");
- fprintf (file, "\t! We add one more instruction to "
- "initialize $fp near to $gp location.\n");
- fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
- fprintf (file, "\t! this extra instruction should be "
- "eliminated at link stage.\n");
- fprintf (file, "\t.omit_fp_begin\n");
- fprintf (file, "\tla\t$fp,_FP_BASE_\n");
- fprintf (file, "\t! ----------------------------------------\n");
- }
}
/* Before rtl epilogue has been expanded, this function is used. */
static void
nds32_asm_function_begin_epilogue (FILE *file)
{
- /* If frame pointer is NOT needed and -mfp-as-gp is issued,
- we can generate special directive: ".omit_fp_end"
- to claim fp-as-gp optimization range.
- However, for a naked function,
- which means it should not have prologue/epilogue,
- using fp-as-gp still requires saving $fp by push/pop behavior and
- there is no benefit to use fp-as-gp on such small function.
- So we need to make sure this function is NOT naked as well. */
- if (!frame_pointer_needed
- && !cfun->machine->naked_p
- && cfun->machine->fp_as_gp_p)
- {
- fprintf (file, "\t! ----------------------------------------\n");
- fprintf (file, "\t! Claim the range of fp-as-gp "
- "link time optimization\n");
- fprintf (file, "\t.omit_fp_end\n");
- fprintf (file, "\t! ----------------------------------------\n");
- }
-
fprintf (file, "\t! BEGIN EPILOGUE\n");
}
"for checking inconsistency on interrupt handler\n");
fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
+ /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
+ the compiler may produce 'la $fp,_FP_BASE_' instruction
+ at prologue for fp-as-gp optimization.
+ We should emit weak reference of _FP_BASE_ to avoid undefined reference
+ in case user does not pass '--relax' option to linker. */
+ if (TARGET_FORCE_FP_AS_GP || optimize_size)
+ {
+ fprintf (asm_out_file, "\t! This weak reference is required to do "
+ "fp-as-gp link time optimization\n");
+ fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
+ }
+
fprintf (asm_out_file, "\t! ------------------------------------\n");
if (TARGET_ISA_V2)
fixed_regs[r] = call_used_regs[r] = 1;
}
+ /* See if user explicitly would like to use fp-as-gp optimization.
+ If so, we must prevent $fp from being allocated
+ during register allocation. */
+ if (TARGET_FORCE_FP_AS_GP)
+ fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1;
+
if (!TARGET_16_BIT)
{
/* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */
The result will be in cfun->machine. */
nds32_compute_stack_frame ();
+ /* Check frame_pointer_needed again to prevent fp is need after reload. */
+ if (frame_pointer_needed)
+ cfun->machine->fp_as_gp_p = false;
+
/* If this is a variadic function, first we need to push argument
registers that hold the unnamed argument value. */
if (cfun->machine->va_args_size != 0)
if (cfun->machine->callee_saved_gpr_regs_size > 0)
df_set_regs_ever_live (FP_REGNUM, 1);
+ /* Check frame_pointer_needed again to prevent fp is need after reload. */
+ if (frame_pointer_needed)
+ cfun->machine->fp_as_gp_p = false;
+
/* If the function is 'naked',
we do not have to generate prologue code fragment. */
if (cfun->machine->naked_p && !flag_pic)
nds32_expand_prologue_v3push ();
else
nds32_expand_prologue ();
+
+ /* If cfun->machine->fp_as_gp_p is true, we can generate special
+ directive to guide linker doing fp-as-gp optimization.
+ However, for a naked function, which means
+ it should not have prologue/epilogue,
+ using fp-as-gp still requires saving $fp by push/pop behavior and
+ there is no benefit to use fp-as-gp on such small function.
+ So we need to make sure this function is NOT naked as well. */
+ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+ emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
+
DONE;
})
(define_expand "epilogue" [(const_int 0)]
""
{
+ /* If cfun->machine->fp_as_gp_p is true, we can generate special
+ directive to guide linker doing fp-as-gp optimization.
+ However, for a naked function, which means
+ it should not have prologue/epilogue,
+ using fp-as-gp still requires saving $fp by push/pop behavior and
+ there is no benefit to use fp-as-gp on such small function.
+ So we need to make sure this function is NOT naked as well. */
+ if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+ emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
+
/* Note that only under V3/V3M ISA, we could use v3pop epilogue.
In addition, we need to check if v3push is indeed available. */
if (NDS32_V3PUSH_AVAILABLE_P)
"nds32_can_use_return_insn ()"
{
/* Emit as the simple return. */
- if (cfun->machine->naked_p
+ if (!cfun->machine->fp_as_gp_p
+ && cfun->machine->naked_p
&& (cfun->machine->va_args_size == 0))
{
emit_jump_insn (gen_return_internal ());
;; This pattern is expanded only by the shrink-wrapping optimization
;; on paths where the function prologue has not been executed.
+;; However, such optimization may reorder the prologue/epilogue blocks
+;; together with basic blocks within function body.
+;; So we must disable this pattern if we have already decided
+;; to perform fp_as_gp optimization, which requires prologue to be
+;; first block and epilogue to be last block.
(define_expand "simple_return"
[(simple_return)]
- ""
+ "!cfun->machine->fp_as_gp_p"
""
)
[(set_attr "length" "0")]
)
+;; Output .omit_fp_begin for fp-as-gp optimization.
+;; Also we have to set $fp register.
+(define_insn "omit_fp_begin"
+ [(set (match_operand:SI 0 "register_operand" "=x")
+ (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
+ ""
+ "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
+ [(set_attr "length" "8")]
+)
+
+;; Output .omit_fp_end for fp-as-gp optimization.
+;; Claim that we have to use $fp register.
+(define_insn "omit_fp_end"
+ [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
+ ""
+ "! -----\;.omit_fp_end\;! -----"
+ [(set_attr "length" "0")]
+)
+
(define_insn "pop25return"
[(return)
(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]
Target RejectNegative Alias(mlittle-endian)
Generate code in little-endian mode.
+mfp-as-gp
+Target RejectNegative Alias(mforce-fp-as-gp)
+Force performing fp-as-gp optimization.
+
+mno-fp-as-gp
+Target RejectNegative Alias(mforbid-fp-as-gp)
+Forbid performing fp-as-gp optimization.
; ---------------------------------------------------------------
Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
Generate code in little-endian mode.
+mforce-fp-as-gp
+Target Undocumented Mask(FORCE_FP_AS_GP)
+Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
+
+mforbid-fp-as-gp
+Target Undocumented Mask(FORBID_FP_AS_GP)
+Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
+
mict-model=
Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL)
Specify the address generation strategy for ICT call's code model.