From 5e96277660887473bdc8b1953fcc35aedd092e3a Mon Sep 17 00:00:00 2001 From: Zdenek Dvorak Date: Sun, 20 Jun 2004 23:31:32 +0200 Subject: [PATCH] loop-invariant.c: New file. * loop-invariant.c: New file. * Makefile.in (loop-invariant.o): New. * cfgloop.h (global_cost_for_size, init_set_costs, move_loop_invariants): Declare. * cfgloopanal.c (seq_cost, init_set_costs, global_cost_for_size): New functions. (avail_regs, res_regs, small_cost, pres_cost, spill_cost): New variables. * common.opt (floop-optimize2, fmove-loop-invariants): New options. * loop-init.c (loop_optimizer_init): Call init_set_costs. * passes.c (rest_of_handle_loop2): Call move_loop_invariants. (rest_of_compilation): Check flag_loop_optimize2. * toplev.c (process_options): Handle flag_loop_optimize2. * doc/invoke.texi (-floop-optimize2, -fmove-loop-invariants): Document. * doc/passes.texi (loop-invariant.c): Document. From-SVN: r83419 --- gcc/ChangeLog | 18 + gcc/Makefile.in | 5 +- gcc/cfgloop.h | 6 + gcc/cfgloopanal.c | 89 +++++ gcc/common.opt | 8 + gcc/doc/invoke.texi | 13 +- gcc/doc/passes.texi | 4 + gcc/loop-init.c | 7 + gcc/loop-invariant.c | 933 +++++++++++++++++++++++++++++++++++++++++++ gcc/passes.c | 9 +- gcc/toplev.c | 13 + 11 files changed, 1101 insertions(+), 4 deletions(-) create mode 100644 gcc/loop-invariant.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 20ce45ee287..3b897aeb177 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2004-06-20 Zdenek Dvorak + + * loop-invariant.c: New file. + * Makefile.in (loop-invariant.o): New. + * cfgloop.h (global_cost_for_size, init_set_costs, + move_loop_invariants): Declare. + * cfgloopanal.c (seq_cost, init_set_costs, global_cost_for_size): New + functions. + (avail_regs, res_regs, small_cost, pres_cost, spill_cost): New + variables. + * common.opt (floop-optimize2, fmove-loop-invariants): New options. + * loop-init.c (loop_optimizer_init): Call init_set_costs. + * passes.c (rest_of_handle_loop2): Call move_loop_invariants. + (rest_of_compilation): Check flag_loop_optimize2. + * toplev.c (process_options): Handle flag_loop_optimize2. + * doc/invoke.texi (-floop-optimize2, -fmove-loop-invariants): Document. + * doc/passes.texi (loop-invariant.c): Document. + 2004-06-20 Zdenek Dvorak * tree-ssa-pre.c (compute_antic): Keep BB_VISITED flag zeroed. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 6c21b666a55..020f4bcd165 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -897,7 +897,7 @@ OBJS-common = \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \ cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \ - dbxout.o ddg.o \ + dbxout.o ddg.o loop-invariant.o \ debug.o df.o diagnostic.o dojump.o dominance.o loop-doloop.o \ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \ expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \ @@ -1952,6 +1952,9 @@ cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H) loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H) +loop-invariant.o : loop-invariant.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \ + $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H) \ + function.h flags.h df.h cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(CFGLAYOUT_H) output.h coretypes.h $(TM_H) loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index e3ac448f371..d73c99b240c 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -411,6 +411,11 @@ simple_loop_desc (struct loop *loop) return loop->aux; } +/* Register pressure estimation for induction variable optimizations & loop + invariant motion. */ +extern unsigned global_cost_for_size (unsigned, unsigned, unsigned); +extern void init_set_costs (void); + /* Loop optimizer initialization. */ extern struct loops *loop_optimizer_init (FILE *); extern void loop_optimizer_finalize (struct loops *, FILE *); @@ -427,5 +432,6 @@ enum extern void unroll_and_peel_loops (struct loops *, int); extern void doloop_optimize_loops (struct loops *); +extern void move_loop_invariants (struct loops *); #endif /* GCC_CFGLOOP_H */ diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 358bfa765b8..074574fc029 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -474,3 +474,92 @@ get_loop_level (const struct loop *loop) } return mx; } + +/* Returns estimate on cost of computing SEQ. */ + +static unsigned +seq_cost (rtx seq) +{ + unsigned cost = 0; + rtx set; + + for (; seq; seq = NEXT_INSN (seq)) + { + set = single_set (seq); + if (set) + cost += rtx_cost (set, SET); + else + cost++; + } + + return cost; +} + +/* The properties of the target. */ + +static unsigned avail_regs; /* Number of available registers. */ +static unsigned res_regs; /* Number of reserved registers. */ +static unsigned small_cost; /* The cost for register when there is a free one. */ +static unsigned pres_cost; /* The cost for register when there are not too many + free ones. */ +static unsigned spill_cost; /* The cost for register when we need to spill. */ + +/* Initialize the constants for computing set costs. */ + +void +init_set_costs (void) +{ + rtx seq; + rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); + rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); + rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2); + rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); + unsigned i; + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) + && !fixed_regs[i]) + avail_regs++; + + res_regs = 3; + + /* These are really just heuristic values. */ + + start_sequence (); + emit_move_insn (reg1, reg2); + seq = get_insns (); + end_sequence (); + small_cost = seq_cost (seq); + pres_cost = 2 * small_cost; + + start_sequence (); + emit_move_insn (mem, reg1); + emit_move_insn (reg2, mem); + seq = get_insns (); + end_sequence (); + spill_cost = seq_cost (seq); +} + +/* Calculates cost for having SIZE new loop global variables. REGS_USED is the + number of global registers used in loop. N_USES is the number of relevant + variable uses. */ + +unsigned +global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses) +{ + unsigned regs_needed = regs_used + size; + unsigned cost = 0; + + if (regs_needed + res_regs <= avail_regs) + cost += small_cost * size; + else if (regs_needed <= avail_regs) + cost += pres_cost * size; + else + { + cost += pres_cost * size; + cost += spill_cost * n_uses * (regs_needed - avail_regs) / regs_needed; + } + + return cost; +} + diff --git a/gcc/common.opt b/gcc/common.opt index b6413817dd2..14e29e0a222 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -447,6 +447,10 @@ floop-optimize Common Report Var(flag_loop_optimize) Perform loop optimizations +floop-optimize2 +Common Report Var(flag_loop_optimize2) +Perform loop optimizations using the new loop optimizer + fmath-errno Common Report Var(flag_errno_math) Init(1) Set errno after built-in math functions @@ -475,6 +479,10 @@ fmove-all-movables Common Report Var(flag_move_all_movables) Force all loop invariant computations out of loops +fmove-loop-invariants +Common Report Var(flag_move_loop_invariants) +Move loop invariant computations out of loops + fmudflap Common RejectNegative Report Var(flag_mudflap) Add mudflap bounds-checking instrumentation for single-threaded program. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 20b4949ab76..2dd2b8239d7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -290,7 +290,7 @@ in the following sections. -finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fmodulo-sched -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol --fno-default-inline -fno-defer-pop @gol +-fno-default-inline -fno-defer-pop -floop-optimize2 -fmove-loop-invariants @gol -fno-function-cse -fno-guess-branch-probability @gol -fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol -funsafe-math-optimizations -ffinite-math-only @gol @@ -4191,6 +4191,12 @@ well. Enabled at levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}. +@item -floop-optimize2 +@opindex floop-optimize2 +Perform loop optimizations using the new loop optimizer. The optimizations +(loop unrolling, peeling and unswitching, loop invariant motion) are enabled +by separate flags. + @item -fcrossjumping @opindex crossjumping Perform cross-jumping transformation. This transformation unifies equivalent code and save code size. The @@ -4922,6 +4928,11 @@ roll much (from profile feedback). It also turns on complete loop peeling Enabled with @option{-fprofile-use}. +@item -fmove-loop-invariants +@opindex fmove-loop-invariants +Enables the loop invariant motion pass in the new loop optimizer. Enabled +at level @option{-O1} + @item -funswitch-loops @opindex funswitch-loops Move branches with loop invariant conditions out of the loop, with duplicates diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 767ec138a13..94b9ae5b23a 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -520,6 +520,8 @@ Its source files are @file{loop.c} and @file{unroll.c}, plus the header @file{loop.h} used for communication between them. Loop unrolling uses some functions in @file{integrate.c} and the header @file{integrate.h}. Loop dependency analysis routines are contained in @file{dependence.c}. +This pass is seriously out-of-date and is supposed to be replaced by +a new one described below in near future. A second loop optimization pass takes care of basic block level optimizations---unrolling, peeling and unswitching loops. The source @@ -527,6 +529,8 @@ files are @file{cfgloopanal.c} and @file{cfgloopmanip.c} containing generic loop analysis and manipulation code, @file{loop-init.c} with initialization and finalization code, @file{loop-unswitch.c} for loop unswitching and @file{loop-unroll.c} for loop unrolling and peeling. +It also contains a separate loop invariant motion pass implemented in +@file{loop-invariant.c}. @item Jump bypassing diff --git a/gcc/loop-init.c b/gcc/loop-init.c index eed3e195bd5..ff441e8c56d 100644 --- a/gcc/loop-init.c +++ b/gcc/loop-init.c @@ -35,6 +35,13 @@ loop_optimizer_init (FILE *dumpfile) { struct loops *loops = xcalloc (1, sizeof (struct loops)); edge e; + static bool first_time = true; + + if (first_time) + { + first_time = false; + init_set_costs (); + } /* Avoid annoying special cases of edges going to exit block. */ diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c new file mode 100644 index 00000000000..f839ae51635 --- /dev/null +++ b/gcc/loop-invariant.c @@ -0,0 +1,933 @@ +/* Rtl-level loop invariant motion. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +/* This implements the loop invariant motion pass. It is very simple + (no calls, libcalls, etc.). This should be sufficient to cleanup things like + address arithmetics -- other more complicated invariants should be + eliminated on tree level either in tree-ssa-loop-im.c or in tree-ssa-pre.c. + + We proceed loop by loop -- it is simpler than trying to handle things + globally and should not lose much. First we inspect all sets inside loop + and create a dependency graph on insns (saying "to move this insn, you must + also move the following insns"). + + We then need to determine what to move. We estimate the number of registers + used and move as many invariants as possible while we still have enough free + registers. We prefer the expensive invariants. + + Then we move the selected invariants out of the loop, creating a new + temporaries for them if necessary. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "cfgloop.h" +#include "expr.h" +#include "output.h" +#include "function.h" +#include "flags.h" +#include "df.h" + +/* The data stored for the loop. */ + +struct loop_data +{ + struct loop *outermost_exit; /* The outermost exit of the loop. */ + bool has_call; /* True if the loop contains a call. */ +}; + +#define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux) + +/* The description of an use. */ + +struct use +{ + rtx *pos; /* Position of the use. */ + rtx insn; /* The insn in that the use occurs. */ + + struct use *next; /* Next use in the list. */ +}; + +/* The description of a def. */ + +struct def +{ + struct use *uses; /* The list of uses that are uniquely reached + by it. */ + unsigned n_uses; /* Number of such uses. */ + unsigned invno; /* The corresponding invariant. */ +}; + +/* The data stored for each invariant. */ + +struct invariant +{ + /* The number of the invariant. */ + unsigned invno; + + /* Whether we already processed the invariant. */ + bool processed; + + /* The definition of the invariant. */ + struct def *def; + + /* The insn in that it is defined. */ + rtx insn; + + /* Whether it is always executed. */ + bool always_executed; + + /* Whether to move the invariant. */ + bool move; + + /* Cost if the invariant. */ + unsigned cost; + + /* The invariants it depends on. */ + bitmap depends_on; + + /* Used for detecting already visited invariants during determining + costs of movements. */ + unsigned stamp; +}; + +/* The actual stamp for marking already visited invariants during determining + costs of movements. */ + +static unsigned actual_stamp; + +/* The invariants. */ + +static varray_type invariants; + +/* Test for possibility of invariantness of X. */ + +static bool +check_maybe_invariant (rtx x) +{ + enum rtx_code code = GET_CODE (x); + int i, j; + const char *fmt; + + switch (code) + { + case CONST_INT: + case CONST_DOUBLE: + case SYMBOL_REF: + case CONST: + case LABEL_REF: + return true; + + case PC: + case CC0: + case UNSPEC_VOLATILE: + case CALL: + return false; + + case REG: + return true; + + case MEM: + /* Load/store motion is done elsewhere. ??? Perhaps also add it here? + It should not be hard, and might be faster than "elsewhere". */ + + /* Just handle the most trivial case where we load from an unchanging + location (most importantly, pic tables). */ + if (RTX_UNCHANGING_P (x)) + break; + + return false; + + case ASM_OPERANDS: + /* Don't mess with insns declared volatile. */ + if (MEM_VOLATILE_P (x)) + return false; + break; + + default: + break; + } + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if (!check_maybe_invariant (XEXP (x, i))) + return false; + } + else if (fmt[i] == 'E') + { + for (j = 0; j < XVECLEN (x, i); j++) + if (!check_maybe_invariant (XVECEXP (x, i, j))) + return false; + } + } + + return true; +} + +/* Determines the basic blocks inside LOOP that are always executed and + stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of + basic blocks that may either exit the loop, or contain the call that + does not have to return. BODY is body of the loop obtained by + get_loop_body_in_dom_order. */ + +static void +compute_always_reached (struct loop *loop, basic_block *body, + bitmap may_exit, bitmap always_reached) +{ + unsigned i; + + for (i = 0; i < loop->num_nodes; i++) + { + if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i])) + bitmap_set_bit (always_reached, i); + + if (bitmap_bit_p (may_exit, i)) + return; + } +} + +/* Finds exits out of the LOOP with body BODY. Marks blocks in that we may + exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT + additionally mark blocks that may exit due to a call. */ + +static void +find_exits (struct loop *loop, basic_block *body, + bitmap may_exit, bitmap has_exit) +{ + unsigned i; + edge e; + struct loop *outermost_exit = loop, *aexit; + bool has_call = false; + rtx insn; + + for (i = 0; i < loop->num_nodes; i++) + { + if (body[i]->loop_father == loop) + { + FOR_BB_INSNS (body[i], insn) + { + if (GET_CODE (insn) == CALL_INSN + && !CONST_OR_PURE_CALL_P (insn)) + { + has_call = true; + bitmap_set_bit (may_exit, i); + break; + } + } + + for (e = body[i]->succ; e; e = e->succ_next) + { + if (flow_bb_inside_loop_p (loop, e->dest)) + continue; + + bitmap_set_bit (may_exit, i); + bitmap_set_bit (has_exit, i); + outermost_exit = find_common_loop (outermost_exit, + e->dest->loop_father); + } + continue; + } + + /* Use the data stored for the subloop to decide whether we may exit + through it. It is sufficient to do this for header of the loop, + as other basic blocks inside it must be dominated by it. */ + if (body[i]->loop_father->header != body[i]) + continue; + + if (LOOP_DATA (body[i]->loop_father)->has_call) + { + has_call = true; + bitmap_set_bit (may_exit, i); + } + aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit; + if (aexit != loop) + { + bitmap_set_bit (may_exit, i); + bitmap_set_bit (has_exit, i); + + if (flow_loop_nested_p (aexit, outermost_exit)) + outermost_exit = aexit; + } + } + + loop->aux = xcalloc (1, sizeof (struct loop_data)); + LOOP_DATA (loop)->outermost_exit = outermost_exit; + LOOP_DATA (loop)->has_call = has_call; +} + +/* Check whether we may assign a value to X from a register. */ + +static bool +may_assign_reg_p (rtx x) +{ + return can_copy_p (GET_MODE (x)); +} + +/* Finds definitions that may correspond to invariants in LOOP with body BODY. + DF is the dataflow object. */ + +static void +find_defs (struct loop *loop, basic_block *body, struct df *df) +{ + unsigned i; + bitmap blocks = BITMAP_XMALLOC (); + + for (i = 0; i < loop->num_nodes; i++) + bitmap_set_bit (blocks, body[i]->index); + + df_analyze_subcfg (df, blocks, DF_UD_CHAIN | DF_HARD_REGS | DF_EQUIV_NOTES); + BITMAP_XFREE (blocks); +} + +/* Creates a new invariant for definition DEF in INSN, depending on invariants + in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed, + unless the program ends due to a function call. */ + +static void +create_new_invariant (struct def *def, rtx insn, bitmap depends_on, + bool always_executed) +{ + struct invariant *inv = xmalloc (sizeof (struct invariant)); + rtx set = single_set (insn); + + inv->def = def; + inv->always_executed = always_executed; + inv->depends_on = depends_on; + + /* If the set is simple, usually by moving it we move the whole store out of + the loop. Otherwise we save only cost of the computation. */ + if (def) + inv->cost = rtx_cost (set, SET); + else + inv->cost = rtx_cost (SET_SRC (set), SET); + + inv->move = false; + inv->processed = false; + inv->stamp = 0; + inv->insn = insn; + + inv->invno = VARRAY_ACTIVE_SIZE (invariants); + if (def) + def->invno = inv->invno; + VARRAY_PUSH_GENERIC_PTR_NOGC (invariants, inv); + + if (dump_file) + { + fprintf (dump_file, + "Set in insn %d is invariant (%d), cost %d, depends on ", + INSN_UID (insn), inv->invno, inv->cost); + dump_bitmap (dump_file, inv->depends_on); + } +} + +/* Record USE at DEF. */ + +static void +record_use (struct def *def, rtx *use, rtx insn) +{ + struct use *u = xmalloc (sizeof (struct use)); + + if (GET_CODE (*use) == SUBREG) + use = &SUBREG_REG (*use); + if (!REG_P (*use)) + abort (); + + u->pos = use; + u->insn = insn; + u->next = def->uses; + def->uses = u; + def->n_uses++; +} + +/* Finds the invariants INSN depends on and store them to the DEPENDS_ON + bitmap. DF is the dataflow object. */ + +static bool +check_dependencies (rtx insn, struct df *df, bitmap depends_on) +{ + struct df_link *uses, *defs; + struct ref *use, *def; + basic_block bb = BLOCK_FOR_INSN (insn), def_bb; + struct def *def_data; + + for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next) + { + use = uses->ref; + + defs = DF_REF_CHAIN (use); + if (!defs) + continue; + + if (defs->next) + return false; + + def = defs->ref; + def_data = DF_REF_DATA (def); + if (!def_data) + return false; + + def_bb = DF_REF_BB (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) + return false; + + bitmap_set_bit (depends_on, def_data->invno); + } + + return true; +} + +/* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always + executed. ALWAYS_EXECUTED is true if the insn is always executed, + unless the program ends due to a function call. DF is the dataflow + object. */ + +static void +find_invariant_insn (rtx insn, bool always_reached, bool always_executed, + struct df *df) +{ + struct ref *ref; + struct def *def; + bitmap depends_on; + rtx set, dest; + bool simple = true; + + /* Until we get rid of LIBCALLS. */ + if (find_reg_note (insn, REG_RETVAL, NULL_RTX) + || find_reg_note (insn, REG_LIBCALL, NULL_RTX) + || find_reg_note (insn, REG_NO_CONFLICT, NULL_RTX)) + return; + + set = single_set (insn); + if (!set) + return; + dest = SET_DEST (set); + + if (GET_CODE (dest) != REG + || HARD_REGISTER_P (dest)) + simple = false; + + if (!check_maybe_invariant (SET_SRC (set)) + || !may_assign_reg_p (SET_DEST (set))) + return; + + if (may_trap_p (PATTERN (insn))) + { + if (!always_reached) + return; + + /* Unless the exceptions are handled, the behavior is undefined + if the trap occurs. */ + if (flag_non_call_exceptions) + return; + } + + depends_on = BITMAP_XMALLOC (); + if (!check_dependencies (insn, df, depends_on)) + { + BITMAP_XFREE (depends_on); + return; + } + + if (simple) + { + ref = df_find_def (df, insn, dest); + def = xcalloc (1, sizeof (struct def)); + DF_REF_DATA (ref) = def; + } + else + def = NULL; + + create_new_invariant (def, insn, depends_on, always_executed); +} + +/* Record registers used in INSN that have an unique invariant definition. + DF is the dataflow object. */ + +static void +record_uses (rtx insn, struct df *df) +{ + struct df_link *uses, *defs; + struct ref *use, *def; + basic_block bb = BLOCK_FOR_INSN (insn), def_bb; + + for (uses = DF_INSN_USES (df, insn); uses; uses = uses->next) + { + use = uses->ref; + + defs = DF_REF_CHAIN (use); + if (!defs || defs->next) + continue; + def = defs->ref; + if (!DF_REF_DATA (def)) + continue; + + def_bb = DF_REF_BB (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) + continue; + + record_use (DF_REF_DATA (def), DF_REF_LOC (use), DF_REF_INSN (use)); + } +} + +/* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always + executed. ALWAYS_EXECUTED is true if the insn is always executed, + unless the program ends due to a function call. DF is the dataflow + object. */ + +static void +find_invariants_insn (rtx insn, bool always_reached, bool always_executed, + struct df *df) +{ + find_invariant_insn (insn, always_reached, always_executed, df); + record_uses (insn, df); +} + +/* Finds invariants in basic block BB. ALWAYS_REACHED is true if the + basic block is always executed. ALWAYS_EXECUTED is true if the basic + block is always executed, unless the program ends due to a function + call. DF is the dataflow object. */ + +static void +find_invariants_bb (basic_block bb, bool always_reached, bool always_executed, + struct df *df) +{ + rtx insn; + + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + continue; + + find_invariants_insn (insn, always_reached, always_executed, df); + + if (always_reached + && GET_CODE (insn) == CALL_INSN + && !CONST_OR_PURE_CALL_P (insn)) + always_reached = false; + } +} + +/* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of + basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the + bitmap of basic blocks in BODY that are always executed unless the program + ends due to a function call. DF is the dataflow object. */ + +static void +find_invariants_body (struct loop *loop, basic_block *body, + bitmap always_reached, bitmap always_executed, + struct df *df) +{ + unsigned i; + + for (i = 0; i < loop->num_nodes; i++) + find_invariants_bb (body[i], + bitmap_bit_p (always_reached, i), + bitmap_bit_p (always_executed, i), + df); +} + +/* Finds invariants in LOOP. DF is the dataflow object. */ + +static void +find_invariants (struct loop *loop, struct df *df) +{ + bitmap may_exit = BITMAP_XMALLOC (); + bitmap always_reached = BITMAP_XMALLOC (); + bitmap has_exit = BITMAP_XMALLOC (); + bitmap always_executed = BITMAP_XMALLOC (); + basic_block *body = get_loop_body_in_dom_order (loop); + + find_exits (loop, body, may_exit, has_exit); + compute_always_reached (loop, body, may_exit, always_reached); + compute_always_reached (loop, body, has_exit, always_executed); + + find_defs (loop, body, df); + find_invariants_body (loop, body, always_reached, always_executed, df); + + BITMAP_XFREE (always_reached); + BITMAP_XFREE (always_executed); + BITMAP_XFREE (may_exit); + BITMAP_XFREE (has_exit); + free (body); +} + +/* Frees a list of uses USE. */ + +static void +free_use_list (struct use *use) +{ + struct use *next; + + for (; use; use = next) + { + next = use->next; + free (use); + } +} + +/* Calculates cost and number of registers needed for moving invariant INV + out of the loop and stores them to *COST and *REGS_NEEDED. */ + +static void +get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed) +{ + int acomp_cost; + unsigned aregs_needed; + unsigned depno; + struct invariant *dep; + + *comp_cost = 0; + *regs_needed = 0; + if (inv->move + || inv->stamp == actual_stamp) + return; + inv->stamp = actual_stamp; + + (*regs_needed)++; + (*comp_cost) += inv->cost; + + EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, + { + dep = VARRAY_GENERIC_PTR_NOGC (invariants, depno); + + get_inv_cost (dep, &acomp_cost, &aregs_needed); + + if (aregs_needed + /* We need to check always_executed, since if the original value of + the invariant may be preserved, we may need to keep it in a + separate register. TODO check whether the register has an + use outside of the loop. */ + && dep->always_executed + && !dep->def->uses->next) + { + /* If this is a single use, after moving the dependency we will not + need a new register. */ + aregs_needed--; + } + + (*regs_needed) += aregs_needed; + (*comp_cost) += acomp_cost; + }); +} + +/* Calculates gain for eliminating invariant INV. REGS_USED is the number + of registers used in the loop, N_INV_USES is the number of uses of + invariants, NEW_REGS is the number of new variables already added due to + the invariant motion. The number of registers needed for it is stored in + *REGS_NEEDED. */ + +static int +gain_for_invariant (struct invariant *inv, unsigned *regs_needed, + unsigned new_regs, unsigned regs_used, unsigned n_inv_uses) +{ + int comp_cost, size_cost; + + get_inv_cost (inv, &comp_cost, regs_needed); + actual_stamp++; + + size_cost = (global_cost_for_size (new_regs + *regs_needed, + regs_used, n_inv_uses) + - global_cost_for_size (new_regs, regs_used, n_inv_uses)); + + return comp_cost - size_cost; +} + +/* Finds invariant with best gain for moving. Returns the gain, stores + the invariant in *BEST and number of registers needed for it to + *REGS_NEEDED. REGS_USED is the number of registers used in + the loop, N_INV_USES is the number of uses of invariants. NEW_REGS + is the number of new variables already added due to invariant motion. */ + +static int +best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, + unsigned new_regs, unsigned regs_used, + unsigned n_inv_uses) +{ + struct invariant *inv; + int gain = 0, again; + unsigned aregs_needed, invno; + + for (invno = 0; invno < VARRAY_ACTIVE_SIZE (invariants); invno++) + { + inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno); + if (inv->move) + continue; + + again = gain_for_invariant (inv, &aregs_needed, + new_regs, regs_used, n_inv_uses); + if (again > gain) + { + gain = again; + *best = inv; + *regs_needed = aregs_needed; + } + } + + return gain; +} + +/* Marks invariant INVNO and all its dependencies for moving. */ + +static void +set_move_mark (unsigned invno) +{ + struct invariant *inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno); + + if (inv->move) + return; + inv->move = true; + + if (dump_file) + fprintf (dump_file, "Decided to move invariant %d\n", invno); + + EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, invno, set_move_mark (invno)); +} + +/* Determines which invariants to move. DF is the dataflow object. */ + +static void +find_invariants_to_move (struct df *df) +{ + unsigned i, regs_used, n_inv_uses, regs_needed = 0, new_regs; + struct invariant *inv = NULL; + + if (flag_move_all_movables) + { + /* This is easy & stupid. */ + for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++) + { + inv = VARRAY_GENERIC_PTR_NOGC (invariants, i); + inv->move = true; + } + return; + } + + if (!VARRAY_ACTIVE_SIZE (invariants)) + return; + + /* Now something slightly more involved. First estimate the number of used + registers. */ + n_inv_uses = 0; + + /* We do not really do a good job in this estimation; put some initial bound + here to stand for induction variables etc. that we do not detect. */ + regs_used = 2; + + for (i = 0; i < df->n_regs; i++) + { + if (!DF_REGNO_FIRST_DEF (df, i) && DF_REGNO_LAST_USE (df, i)) + { + /* This is a value that is used but not changed inside loop. */ + regs_used++; + } + } + + for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++) + { + inv = VARRAY_GENERIC_PTR_NOGC (invariants, i); + if (inv->def) + n_inv_uses += inv->def->n_uses; + } + + new_regs = 0; + while (best_gain_for_invariant (&inv, ®s_needed, + new_regs, regs_used, n_inv_uses) > 0) + { + set_move_mark (inv->invno); + new_regs += regs_needed; + } +} + +/* Move invariant INVNO out of the LOOP. DF is the dataflow object. */ + +static void +move_invariant_reg (struct loop *loop, unsigned invno, struct df *df) +{ + struct invariant *inv = VARRAY_GENERIC_PTR_NOGC (invariants, invno); + unsigned i; + basic_block preheader = loop_preheader_edge (loop)->src; + rtx reg, set; + struct use *use; + + if (inv->processed) + return; + inv->processed = true; + + if (inv->depends_on) + { + EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, i, + { + move_invariant_reg (loop, i, df); + }); + } + + /* Move the set out of the loop. If the set is always executed (we could + omit this condition if we know that the register is unused outside of the + loop, but it does not seem worth finding out) and it has no uses that + would not be dominated by it, we may just move it (TODO). Otherwise we + need to create a temporary register. */ + set = single_set (inv->insn); + reg = gen_reg_rtx (GET_MODE (SET_DEST (set))); + df_pattern_emit_after (df, gen_move_insn (SET_DEST (set), reg), + BLOCK_FOR_INSN (inv->insn), inv->insn); + SET_DEST (set) = reg; + reorder_insns (inv->insn, inv->insn, BB_END (preheader)); + df_insn_modify (df, preheader, inv->insn); + + /* Replace the uses we know to be dominated. It saves work for copy + propagation, and also it is necessary so that dependent invariants + are computed right. */ + if (inv->def) + { + for (use = inv->def->uses; use; use = use->next) + { + *use->pos = reg; + df_insn_modify (df, BLOCK_FOR_INSN (use->insn), use->insn); + } + } +} + +/* Move selected invariant out of the LOOP. Newly created regs are marked + in TEMPORARY_REGS. DF is the dataflow object. */ + +static void +move_invariants (struct loop *loop, struct df *df) +{ + struct invariant *inv; + unsigned i; + + for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++) + { + inv = VARRAY_GENERIC_PTR_NOGC (invariants, i); + if (inv->move) + move_invariant_reg (loop, i, df); + } +} + +/* Initializes invariant motion data. */ + +static void +init_inv_motion_data (void) +{ + actual_stamp = 1; + + if (!invariants) + VARRAY_GENERIC_PTR_NOGC_INIT (invariants, 100, "invariants"); +} + +/* Frees the data allocated by invariant motion. DF is the dataflow + object. */ + +static void +free_inv_motion_data (struct df *df) +{ + unsigned i; + struct def *def; + struct invariant *inv; + + for (i = 0; i < df->n_defs; i++) + { + if (!df->defs[i]) + continue; + + def = DF_REF_DATA (df->defs[i]); + if (!def) + continue; + + free_use_list (def->uses); + free (def); + DF_REF_DATA (df->defs[i]) = NULL; + } + + for (i = 0; i < VARRAY_ACTIVE_SIZE (invariants); i++) + { + inv = VARRAY_GENERIC_PTR_NOGC (invariants, i); + BITMAP_XFREE (inv->depends_on); + free (inv); + } + VARRAY_POP_ALL (invariants); +} + +/* Move the invariants out of the LOOP. DF is the dataflow object. */ + +static void +move_single_loop_invariants (struct loop *loop, struct df *df) +{ + init_inv_motion_data (); + + find_invariants (loop, df); + find_invariants_to_move (df); + move_invariants (loop, df); + + free_inv_motion_data (df); +} + +/* Releases the auxiliary data for LOOP. */ + +static void +free_loop_data (struct loop *loop) +{ + struct loop_data *data = LOOP_DATA (loop); + + free (data); + loop->aux = NULL; +} + +/* Move the invariants out of the LOOPS. */ + +void +move_loop_invariants (struct loops *loops) +{ + struct loop *loop; + unsigned i; + struct df *df = df_init (); + + /* Process the loops, innermost first. */ + loop = loops->tree_root; + while (loop->inner) + loop = loop->inner; + + while (loop != loops->tree_root) + { + move_single_loop_invariants (loop, df); + + if (loop->next) + { + loop = loop->next; + while (loop->inner) + loop = loop->inner; + } + else + loop = loop->outer; + } + + for (i = 1; i < loops->num; i++) + if (loops->parray[i]) + free_loop_data (loops->parray[i]); + + df_finish (df); +} diff --git a/gcc/passes.c b/gcc/passes.c index a4fb35a2611..678dd7dd744 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1290,7 +1290,8 @@ rest_of_handle_loop2 (void) struct loops *loops; basic_block bb; - if (!flag_unswitch_loops + if (!flag_move_loop_invariants + && !flag_unswitch_loops && !flag_peel_loops && !flag_unroll_loops && !flag_branch_on_count_reg) @@ -1309,6 +1310,9 @@ rest_of_handle_loop2 (void) if (loops) { /* The optimizations: */ + if (flag_move_loop_invariants) + move_loop_invariants (loops); + if (flag_unswitch_loops) unswitch_loops (loops); @@ -1598,7 +1602,8 @@ rest_of_compilation (void) if (flag_tracer) rest_of_handle_tracer (); - if (optimize > 0) + if (optimize > 0 + && flag_loop_optimize2) rest_of_handle_loop2 (); if (flag_web) diff --git a/gcc/toplev.c b/gcc/toplev.c index b2990bcae77..1ce344cac9c 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1673,6 +1673,19 @@ process_options (void) if (flag_unroll_loops || flag_peel_loops) flag_rerun_cse_after_loop = 1; + /* If explicitly asked to run new loop optimizer, switch off the old + one. */ + if (flag_loop_optimize2) + flag_loop_optimize = 0; + + /* Enable new loop optimizer pass if any of its optimizations is called. */ + if (flag_move_loop_invariants + || flag_unswitch_loops + || flag_peel_loops + || flag_unroll_loops + || flag_branch_on_count_reg) + flag_loop_optimize2 = 1; + if (flag_non_call_exceptions) flag_asynchronous_unwind_tables = 1; if (flag_asynchronous_unwind_tables) -- 2.30.2