* ddg.h, ddg.c, modulo-sched.c: New files.
* cfglayout.c (duplicate_insn_chain): Remove "static" and push
internals to "dupicate_insn".
(duplicate_insn): New function.
* cfglayout.h (duplicate_insn_chain, duplicate_insn): New
declarations.
* common.opt (fmodulo-sched): New flag.
* df.c (df_bb_regno_last_use_find, df_bb_regno_first_def_find):
Remove static and forward declaration.
(df_find_def, df_reg_used, df_bb_regno_last_def_find): New
functions.
* df.h (df_bb_regno_last_use_find, df_bb_regno_first_def_find,
df_bb_regno_last_def_find, df_find_def, df_reg_used): New
declarations.
* flags.h (flag_modulo_sched): New flag.
* opts.c (common_handle_option): Handle modulo-sched flag.
* params.def (max-sms-loop-number, sms-max-ii-factor,
sms-dfa-history, sms-loop-average-count-threshold): New
parameters.
* params.h (MAX_SMS_LOOP_NUMBER, SMS_MAX_II_FACTOR,
SMS_DFA_HISTORY, SMS_LOOP_AVERAGE_COUNT_THRESHOLD): New
parameters.
* passes.c ("sms", "sms-vcg"): New dumps.
(rest_of_handle_sched): Call sms_schedule.
* rtl.h (sms_schedule): New declaration.
* timevar.def (TV_SMS): New.
* toplev.c (flag_modulo_sched): Initialize.
(f_options): Handle -fmodulo-sched option.
* docs/invoke.texi: Document -fmodulo-sched & -dm options.
* docs/passes.texi: Document new SMS pass.f toplev.c doc/invoke.texi doc/passes.texi
Co-Authored-By: Mostafa Hagog <mustafa@il.ibm.com>
From-SVN: r82235
+2004-05-25 Ayal Zaks <zaks@il.ibm.com>
+ Mostafa Hagog <mustafa@il.ibm.com>
+
+ * Makefile.in (modulo-sched.o, ddg.o): New.
+ * ddg.h, ddg.c, modulo-sched.c: New files.
+ * cfglayout.c (duplicate_insn_chain): Remove "static" and push
+ internals to "dupicate_insn".
+ (duplicate_insn): New function.
+ * cfglayout.h (duplicate_insn_chain, duplicate_insn): New
+ declarations.
+ * common.opt (fmodulo-sched): New flag.
+ * df.c (df_bb_regno_last_use_find, df_bb_regno_first_def_find):
+ Remove static and forward declaration.
+ (df_find_def, df_reg_used, df_bb_regno_last_def_find): New
+ functions.
+ * df.h (df_bb_regno_last_use_find, df_bb_regno_first_def_find,
+ df_bb_regno_last_def_find, df_find_def, df_reg_used): New
+ declarations.
+ * flags.h (flag_modulo_sched): New flag.
+ * opts.c (common_handle_option): Handle modulo-sched flag.
+ * params.def (max-sms-loop-number, sms-max-ii-factor,
+ sms-dfa-history, sms-loop-average-count-threshold): New
+ parameters.
+ * params.h (MAX_SMS_LOOP_NUMBER, SMS_MAX_II_FACTOR,
+ SMS_DFA_HISTORY, SMS_LOOP_AVERAGE_COUNT_THRESHOLD): New
+ parameters.
+ * passes.c ("sms", "sms-vcg"): New dumps.
+ (rest_of_handle_sched): Call sms_schedule.
+ * rtl.h (sms_schedule): New declaration.
+ * timevar.def (TV_SMS): New.
+ * toplev.c (flag_modulo_sched): Initialize.
+ (f_options): Handle -fmodulo-sched option.
+ * docs/invoke.texi: Document -fmodulo-sched & -dm options.
+ * docs/passes.texi: Document new SMS pass.
+
2004-05-25 Paolo Bonzini <bonzini@gnu.org>
* Makefile.in (OBJS): Add rtlhooks.o.
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
- dbxout.o debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \
+ dbxout.o ddg.o \
+ debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \
integrate.o intl.o jump.o langhooks.o lcm.o lists.o local-alloc.o \
- loop.o optabs.o options.o opts.o params.o postreload.o predict.o \
+ loop.o modulo-sched.o \
+ optabs.o options.o opts.o params.o postreload.o predict.o \
print-rtl.o print-tree.o value-prof.o var-tracking.o \
profile.o ra.o ra-build.o ra-colorize.o ra-debug.o ra-rewrite.o \
real.o recog.o reg-stack.o regclass.o regmove.o regrename.o \
regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) insn-config.h \
$(RECOG_H) output.h $(REGS_H) hard-reg-set.h flags.h function.h \
$(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) except.h reload.h
+ddg.o : ddg.c ddg.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) \
+ toplev.h $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) regs.h function.h flags.h \
+ insn-config.h insn-attr.h except.h $(RECOG_H) sched-int.h \
+ cfglayout.h cfgloop.h $(EXPR_H)
+modulo-sched.o : modulo-sched.c ddg.h cfgloop.h $(CONFIG_H) \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) \
+ toplev.h $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) regs.h function.h flags.h \
+ insn-config.h insn-attr.h except.h $(RECOG_H) sched-int.h \
+ cfglayout.h cfgloop.h $(EXPR_H) params.h $(COVERAGE_H)
haifa-sched.o : haifa-sched.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
sched-int.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H) $(TARGET_H)
void verify_insn_chain (void);
static void fixup_fallthru_exit_predecessor (void);
-static rtx duplicate_insn_chain (rtx, rtx);
static tree insn_scope (rtx);
static void update_unlikely_executed_notes (basic_block);
\f
return true;
}
-static rtx
+rtx
duplicate_insn_chain (rtx from, rtx to)
{
rtx insn, last;
extern void copy_bbs (basic_block *, unsigned, basic_block *,
edge *, unsigned, edge *, struct loop *);
extern bool scan_ahead_for_unlikely_executed_note (rtx);
+extern rtx duplicate_insn_chain (rtx, rtx);
Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping
+fmodulo-sched
+Common
+Perform SMS based modulo scheduling before the first scheduling pass
+
fmove-all-movables
Common
Force all loop invariant computations out of loops
static int df_def_dominates_all_uses_p (struct df *, struct ref *def);
static int df_def_dominates_uses_p (struct df *, struct ref *def, bitmap);
-static struct ref *df_bb_regno_last_use_find (struct df *, basic_block,
- unsigned int);
-static struct ref *df_bb_regno_first_def_find (struct df *, basic_block,
- unsigned int);
static struct ref *df_bb_insn_regno_last_use_find (struct df *, basic_block,
rtx, unsigned int);
static struct ref *df_bb_insn_regno_first_def_find (struct df *, basic_block,
return 0;
}
+/* Finds the reference corresponding to the definition of REG in INSN.
+ DF is the dataflow object. */
+
+struct ref *
+df_find_def (struct df *df, rtx insn, rtx reg)
+{
+ struct df_link *defs;
+
+ for (defs = DF_INSN_DEFS (df, insn); defs; defs = defs->next)
+ if (rtx_equal_p (DF_REF_REG (defs->ref), reg))
+ return defs->ref;
+
+ return NULL;
+}
+
+/* Return 1 if REG is referenced in INSN, zero otherwise. */
+
+int
+df_reg_used (struct df *df, rtx insn, rtx reg)
+{
+ struct df_link *uses;
+
+ for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next)
+ if (rtx_equal_p (DF_REF_REG (uses->ref), reg))
+ return 1;
+
+ return 0;
+}
static int
df_def_dominates_all_uses_p (struct df *df ATTRIBUTE_UNUSED, struct ref *def)
/* Return last use of REGNO within BB. */
-static struct ref *
+struct ref *
df_bb_regno_last_use_find (struct df *df, basic_block bb, unsigned int regno)
{
struct df_link *link;
/* Return first def of REGNO within BB. */
-static struct ref *
+struct ref *
df_bb_regno_first_def_find (struct df *df, basic_block bb, unsigned int regno)
{
struct df_link *link;
return 0;
}
+/* Return last def of REGNO within BB. */
+struct ref *
+df_bb_regno_last_def_find (struct df *df, basic_block bb, unsigned int regno)
+{
+ struct df_link *link;
+ struct ref *last_def = NULL;
+ int in_bb = 0;
+
+ /* This assumes that the reg-def list is ordered such that for any
+ BB, the first def is found first. However, since the BBs are not
+ ordered, the first def in the chain is not necessarily the first
+ def in the function. */
+ for (link = df->regs[regno].defs; link; link = link->next)
+ {
+ struct ref *def = link->ref;
+ /* The first time in the desired block. */
+ if (DF_REF_BB (def) == bb)
+ in_bb = 1;
+ /* The last def in the desired block. */
+ else if (in_bb)
+ return last_def;
+ last_def = def;
+ }
+ return last_def;
+}
/* Return first use of REGNO inside INSN within BB. */
static struct ref *
extern rtx df_bb_single_def_use_insn_find (struct df *, basic_block, rtx,
rtx);
+extern struct ref *df_bb_regno_last_use_find (struct df *, basic_block, unsigned int);
+
+extern struct ref *df_bb_regno_first_def_find (struct df *, basic_block, unsigned int);
+
+extern struct ref *df_bb_regno_last_def_find (struct df *, basic_block, unsigned int);
+
+extern struct ref *df_find_def (struct df *, rtx, rtx);
+
+extern int df_reg_used (struct df *, rtx, rtx);
/* Functions for debugging from GDB. */
-floop-optimize -fcrossjumping -fif-conversion -fif-conversion2 @gol
-finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol
-fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol
--fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
+-fmodulo-sched -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol
-fno-default-inline -fno-defer-pop @gol
-fno-function-cse -fno-guess-branch-probability @gol
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
Dump after computing branch probabilities, to @file{@var{file}.12.bp}.
@item B
@opindex dB
-Dump after block reordering, to @file{@var{file}.31.bbro}.
+Dump after block reordering, to @file{@var{file}.32.bbro}.
@item c
@opindex dc
Dump after instruction combination, to the file @file{@var{file}.20.combine}.
Also dump after the second if conversion, to the file @file{@var{file}.21.ce2}.
@item d
@opindex dd
-Dump after branch target load optimization, to to @file{@var{file}.32.btl}.
-Also dump after delayed branch scheduling, to @file{@var{file}.36.dbr}.
+Dump after branch target load optimization, to to @file{@var{file}.33.btl}.
+Also dump after delayed branch scheduling, to @file{@var{file}.37.dbr}.
@item D
@opindex dD
Dump all macro definitions, at the end of preprocessing, in addition to
normal output.
@item E
@opindex dE
-Dump after the third if conversion, to @file{@var{file}.30.ce3}.
+Dump after the third if conversion, to @file{@var{file}.31.ce3}.
@item f
@opindex df
Dump after control and data flow analysis, to @file{@var{file}.11.cfg}.
Dump after purging @code{ADDRESSOF} codes, to @file{@var{file}.07.addressof}.
@item g
@opindex dg
-Dump after global register allocation, to @file{@var{file}.25.greg}.
+Dump after global register allocation, to @file{@var{file}.26.greg}.
@item G
@opindex dG
Dump after GCSE, to @file{@var{file}.08.gcse}.
Dump after the first jump optimization, to @file{@var{file}.04.jump}.
@item k
@opindex dk
-Dump after conversion from registers to stack, to @file{@var{file}.34.stack}.
+Dump after conversion from registers to stack, to @file{@var{file}.35.stack}.
@item l
@opindex dl
-Dump after local register allocation, to @file{@var{file}.24.lreg}.
+Dump after local register allocation, to @file{@var{file}.25.lreg}.
@item L
@opindex dL
Dump after loop optimization passes, to @file{@var{file}.09.loop} and
@file{@var{file}.16.loop2}.
+@item m
+@opindex dm
+Dump after modulo scheduling, to @file{@var{file}.23.sms}.
@item M
@opindex dM
Dump after performing the machine dependent reorganization pass, to
-@file{@var{file}.35.mach}.
+@file{@var{file}.36.mach}.
@item n
@opindex dn
-Dump after register renumbering, to @file{@var{file}.29.rnreg}.
+Dump after register renumbering, to @file{@var{file}.30.rnreg}.
@item N
@opindex dN
Dump after the register move pass, to @file{@var{file}.22.regmove}.
@item o
@opindex do
-Dump after post-reload optimizations, to @file{@var{file}.26.postreload}.
+Dump after post-reload optimizations, to @file{@var{file}.27.postreload}.
@item r
@opindex dr
Dump after RTL generation, to @file{@var{file}.01.rtl}.
@item R
@opindex dR
-Dump after the second scheduling pass, to @file{@var{file}.33.sched2}.
+Dump after the second scheduling pass, to @file{@var{file}.34.sched2}.
@item s
@opindex ds
Dump after CSE (including the jump optimization that sometimes follows
CSE), to @file{@var{file}.06.cse}.
@item S
@opindex dS
-Dump after the first scheduling pass, to @file{@var{file}.23.sched}.
+Dump after the first scheduling pass, to @file{@var{file}.24.sched}.
@item t
@opindex dt
Dump after the second CSE pass (including the jump optimization that
Also dump after variable tracking, to @file{@var{file}.35.vartrack}.
@item w
@opindex dw
-Dump after the second flow pass, to @file{@var{file}.27.flow2}.
+Dump after the second flow pass, to @file{@var{file}.28.flow2}.
@item z
@opindex dz
-Dump after the peephole pass, to @file{@var{file}.28.peephole2}.
+Dump after the peephole pass, to @file{@var{file}.29.peephole2}.
@item Z
@opindex dZ
Dump after constructing the web, to @file{@var{file}.17.web}.
have distinct location, so using this option will result in non-conforming
behavior.
+@item -fmodulo-sched
+@opindex fmodulo-sched
+Perform swing modulo scheduling immediately before the first scheduling
+pass. This pass looks at innermost loops and reorders their
+instructions by overlapping different iterations.
+
@item -fnew-ra
@opindex fnew-ra
Use a graph coloring register allocator. Currently this option is meant
satisfy all users. What these modes are, and what they apply to are
completely target-specific. The source is located in @file{lcm.c}.
+@cindex modulo scheduling
+@cindex sms, swing, software pipelining
+@item Modulo scheduling
+
+This pass looks at innermost loops and reorders their instructions
+by overlapping different iterations. Modulo scheduling is performed
+immediately before instruction scheduling.
+The pass is located in (@file{modulo-sched.c}).
+
@item Instruction scheduling
This pass looks for instructions whose output will not be available by
extern int flag_shared_data;
+/* Controls the activiation of SMS modulo scheduling. */
+extern int flag_modulo_sched;
+
/* flag_schedule_insns means schedule insns within basic blocks (before
local_alloc).
flag_schedule_insns_after_reload means schedule insns after
case OPT_fsched_stalled_insns_dep_:
flag_sched_stalled_insns_dep = value;
break;
-
+ case OPT_fmodulo_sched:
+ flag_modulo_sched = 1;
+ break;
case OPT_fshared_data:
flag_shared_data = value;
break;
"The maximum number of unswitchings in a single loop",
3)
+DEFPARAM(PARAM_MAX_SMS_LOOP_NUMBER,
+ "max-sms-loop-number",
+ "Maximum number of loops to perform swing modulo scheduling on \
+ (mainly for debugging)",
+ -1)
+
+/* This parameter is used to tune SMS MAX II calculations. */
+DEFPARAM(PARAM_SMS_MAX_II_FACTOR,
+ "sms-max-ii-factor",
+ "A factor for tuning the upper bound that swing modulo scheduler uses \
+ for scheduling a loop",
+ 100)
+DEFPARAM(PARAM_SMS_DFA_HISTORY,
+ "sms-dfa-history",
+ "The number of cycles the swing modulo scheduler considers when \
+ checking conflicts using DFA",
+ 0)
+DEFPARAM(PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD,
+ "sms-loop-average-count-threshold",
+ "A threshold on the average loop count considered by the swing modulo \
+ scheduler",
+ 0)
+
DEFPARAM(HOT_BB_COUNT_FRACTION,
"hot-bb-count-fraction",
"Select fraction of the maximal count of repetitions of basic block in \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
#define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
+#define MAX_SMS_LOOP_NUMBER \
+ PARAM_VALUE (PARAM_MAX_SMS_LOOP_NUMBER)
+#define SMS_MAX_II_FACTOR \
+ PARAM_VALUE (PARAM_SMS_MAX_II_FACTOR)
+#define SMS_DFA_HISTORY \
+ PARAM_VALUE (PARAM_SMS_DFA_HISTORY)
+#define SMS_LOOP_AVERAGE_COUNT_THRESHOLD \
+ PARAM_VALUE (PARAM_SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
#define GLOBAL_VAR_THRESHOLD \
PARAM_VALUE (PARAM_GLOBAL_VAR_THRESHOLD)
#define MAX_ALIASED_VOPS \
DFI_combine,
DFI_ce2,
DFI_regmove,
+ DFI_sms,
DFI_sched,
DFI_lreg,
DFI_greg,
Remaining -d letters:
- " e m q "
+ " e q "
" K O Q WXY "
*/
{ "combine", 'c', 1, 0, 0 },
{ "ce2", 'C', 1, 0, 0 },
{ "regmove", 'N', 1, 0, 0 },
+ { "sms", 'm', 0, 0, 0 },
{ "sched", 'S', 1, 0, 0 },
{ "lreg", 'l', 1, 0, 0 },
{ "greg", 'g', 1, 0, 0 },
static void
rest_of_handle_sched (tree decl, rtx insns)
{
+ timevar_push (TV_SMS);
+ if (optimize > 0 && flag_modulo_sched)
+ {
+
+ /* Perform SMS module scheduling. */
+ open_dump_file (DFI_sms, decl);
+
+ /* We want to be able to create new pseudos. */
+ no_new_pseudos = 0;
+ sms_schedule (dump_file);
+ close_dump_file (DFI_sms, print_rtl, get_insns ());
+
+
+ /* Update the life information, becuase we add pseudos. */
+ max_regno = max_reg_num ();
+ allocate_reg_info (max_regno, FALSE, FALSE);
+ update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
+ (PROP_DEATH_NOTES
+ | PROP_KILL_DEAD_CODE
+ | PROP_SCAN_DEAD_CODE));
+ no_new_pseudos = 1;
+ }
+ timevar_pop (TV_SMS);
timevar_push (TV_SCHED);
/* Print function header into sched dump now
/* In ra.c. */
extern void reg_alloc (void);
+/* In modulo-sched.c. */
+#ifdef BUFSIZ
+extern void sms_schedule (FILE *);
+#endif
\f
struct rtl_hooks
{
DEFTIMEVAR (TV_IFCVT , "if-conversion")
DEFTIMEVAR (TV_REGMOVE , "regmove")
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
+DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
DEFTIMEVAR (TV_SCHED , "scheduling")
DEFTIMEVAR (TV_LOCAL_ALLOC , "local alloc")
DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc")
int flag_sched_stalled_insns = 0;
int flag_sched_stalled_insns_dep = 1;
+/* The following flag controls the module scheduling activation. */
+int flag_modulo_sched = 0;
+
int flag_single_precision_constant;
/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
{"sched-stalled-insns-dep", &flag_sched_stalled_insns_dep, 1 },
{"sched2-use-superblocks", &flag_sched2_use_superblocks, 1 },
{"sched2-use-traces", &flag_sched2_use_traces, 1 },
+ {"modulo-sched", &flag_modulo_sched, 1 },
{"branch-count-reg",&flag_branch_on_count_reg, 1 },
{"pic", &flag_pic, 1 },
{"PIC", &flag_pic, 2 },