/* Instruction scheduling pass.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
and currently maintained by, Jim Wilson (wilson@cygnus.com)
#include "coretypes.h"
#include "tm.h"
#include "diagnostic-core.h"
+#include "hard-reg-set.h"
#include "rtl.h"
#include "tm_p.h"
-#include "hard-reg-set.h"
#include "regs.h"
#include "function.h"
#include "flags.h"
enable a DCE pass. */
bool sched_no_dce;
+/* The current initiation interval used when modulo scheduling. */
+static int modulo_ii;
+
+/* The maximum number of stages we are prepared to handle. */
+static int modulo_max_stages;
+
+/* The number of insns that exist in each iteration of the loop. We use this
+ to detect when we've scheduled all insns from the first iteration. */
+static int modulo_n_insns;
+
+/* The current count of insns in the first iteration of the loop that have
+ already been scheduled. */
+static int modulo_insns_scheduled;
+
+/* The maximum uid of insns from the first iteration of the loop. */
+static int modulo_iter0_max_uid;
+
+/* The number of times we should attempt to backtrack when modulo scheduling.
+ Decreased each time we have to backtrack. */
+static int modulo_backtracks_left;
+
+/* The stage in which the last insn from the original loop was
+ scheduled. */
+static int modulo_last_stage;
+
/* sched-verbose controls the amount of debugging output the
scheduler prints. It is controlled by -fsched-verbose=N:
N>0 and no -DSR : the output is directed to stderr.
#define INTER_TICK(INSN) (HID (INSN)->inter_tick)
#define FEEDS_BACKTRACK_INSN(INSN) (HID (INSN)->feeds_backtrack_insn)
#define SHADOW_P(INSN) (HID (INSN)->shadow_p)
+#define MUST_RECOMPUTE_SPEC_P(INSN) (HID (INSN)->must_recompute_spec)
/* If INSN_TICK of an instruction is equal to INVALID_TICK,
then it should be recalculated from scratch. */
/* Create empty basic block after the specified block. */
basic_block (* sched_create_empty_bb) (basic_block);
+/* Return the number of cycles until INSN is expected to be ready.
+ Return zero if it already is. */
+static int
+insn_delay (rtx insn)
+{
+ return MAX (INSN_TICK (insn) - clock_var, 0);
+}
+
static int
may_trap_exp (const_rtx x, int is_store)
{
{
return haifa_classify_rtx (PATTERN (insn));
}
+\f
+/* After the scheduler initialization function has been called, this function
+ can be called to enable modulo scheduling. II is the initiation interval
+ we should use, it affects the delays for delay_pairs that were recorded as
+ separated by a given number of stages.
+
+ MAX_STAGES provides us with a limit
+ after which we give up scheduling; the caller must have unrolled at least
+ as many copies of the loop body and recorded delay_pairs for them.
+
+ INSNS is the number of real (non-debug) insns in one iteration of
+ the loop. MAX_UID can be used to test whether an insn belongs to
+ the first iteration of the loop; all of them have a uid lower than
+ MAX_UID. */
+void
+set_modulo_params (int ii, int max_stages, int insns, int max_uid)
+{
+ modulo_ii = ii;
+ modulo_max_stages = max_stages;
+ modulo_n_insns = insns;
+ modulo_iter0_max_uid = max_uid;
+ modulo_backtracks_left = PARAM_VALUE (PARAM_MAX_MODULO_BACKTRACK_ATTEMPTS);
+}
/* A structure to record a pair of insns where the first one is a real
insn that has delay slots, and the second is its delayed shadow.
struct delay_pair *next_same_i1;
rtx i1, i2;
int cycles;
+ /* When doing modulo scheduling, we a delay_pair can also be used to
+ show that I1 and I2 are the same insn in a different stage. If that
+ is the case, STAGES will be nonzero. */
+ int stages;
};
/* Two hash tables to record delay_pairs, one indexed by I1 and the other
static htab_t delay_htab;
static htab_t delay_htab_i2;
+/* Called through htab_traverse. Walk the hashtable using I2 as
+ index, and delete all elements involving an UID higher than
+ that pointed to by *DATA. */
+static int
+htab_i2_traverse (void **slot, void *data)
+{
+ int maxuid = *(int *)data;
+ struct delay_pair *p = *(struct delay_pair **)slot;
+ if (INSN_UID (p->i2) >= maxuid || INSN_UID (p->i1) >= maxuid)
+ {
+ htab_clear_slot (delay_htab_i2, slot);
+ }
+ return 1;
+}
+
+/* Called through htab_traverse. Walk the hashtable using I2 as
+ index, and delete all elements involving an UID higher than
+ that pointed to by *DATA. */
+static int
+htab_i1_traverse (void **slot, void *data)
+{
+ int maxuid = *(int *)data;
+ struct delay_pair **pslot = (struct delay_pair **)slot;
+ struct delay_pair *p, *first, **pprev;
+
+ if (INSN_UID ((*pslot)->i1) >= maxuid)
+ {
+ htab_clear_slot (delay_htab, slot);
+ return 1;
+ }
+ pprev = &first;
+ for (p = *pslot; p; p = p->next_same_i1)
+ {
+ if (INSN_UID (p->i2) < maxuid)
+ {
+ *pprev = p;
+ pprev = &p->next_same_i1;
+ }
+ }
+ *pprev = NULL;
+ if (first == NULL)
+ htab_clear_slot (delay_htab, slot);
+ else
+ *pslot = first;
+ return 1;
+}
+
+/* Discard all delay pairs which involve an insn with an UID higher
+ than MAX_UID. */
+void
+discard_delay_pairs_above (int max_uid)
+{
+ htab_traverse (delay_htab, htab_i1_traverse, &max_uid);
+ htab_traverse (delay_htab_i2, htab_i2_traverse, &max_uid);
+}
+
/* Returns a hash value for X (which really is a delay_pair), based on
hashing just I1. */
static hashval_t
return ((const struct delay_pair *) x)->i2 == y;
}
-/* This function can be called by a port just before it starts the
- final scheduling pass. It records the fact that an instruction
- with delay slots has been split into two insns, I1 and I2. The
- first one will be scheduled normally and initiates the operation.
- The second one is a shadow which must follow a specific number of
- CYCLES after I1; its only purpose is to show the side effect that
- occurs at that cycle in the RTL. If a JUMP_INSN or a CALL_INSN has
- been split, I1 should be a normal INSN, while I2 retains the
- original insn type. */
+/* This function can be called by a port just before it starts the final
+ scheduling pass. It records the fact that an instruction with delay
+ slots has been split into two insns, I1 and I2. The first one will be
+ scheduled normally and initiates the operation. The second one is a
+ shadow which must follow a specific number of cycles after I1; its only
+ purpose is to show the side effect that occurs at that cycle in the RTL.
+ If a JUMP_INSN or a CALL_INSN has been split, I1 should be a normal INSN,
+ while I2 retains the original insn type.
+
+ There are two ways in which the number of cycles can be specified,
+ involving the CYCLES and STAGES arguments to this function. If STAGES
+ is zero, we just use the value of CYCLES. Otherwise, STAGES is a factor
+ which is multiplied by MODULO_II to give the number of cycles. This is
+ only useful if the caller also calls set_modulo_params to enable modulo
+ scheduling. */
void
-record_delay_slot_pair (rtx i1, rtx i2, int cycles)
+record_delay_slot_pair (rtx i1, rtx i2, int cycles, int stages)
{
struct delay_pair *p = XNEW (struct delay_pair);
struct delay_pair **slot;
p->i1 = i1;
p->i2 = i2;
p->cycles = cycles;
+ p->stages = stages;
if (!delay_htab)
{
*slot = p;
}
+/* Examine the delay pair hashtable to see if INSN is a shadow for another,
+ and return the other insn if so. Return NULL otherwise. */
+rtx
+real_insn_for_shadow (rtx insn)
+{
+ struct delay_pair *pair;
+
+ if (delay_htab == NULL)
+ return NULL_RTX;
+
+ pair
+ = (struct delay_pair *)htab_find_with_hash (delay_htab_i2, insn,
+ htab_hash_pointer (insn));
+ if (!pair || pair->stages > 0)
+ return NULL_RTX;
+ return pair->i1;
+}
+
/* For a pair P of insns, return the fixed distance in cycles from the first
insn after which the second must be scheduled. */
static int
pair_delay (struct delay_pair *p)
{
- return p->cycles;
+ if (p->stages == 0)
+ return p->cycles;
+ else
+ return p->stages * modulo_ii;
}
/* Given an insn INSN, add a dependence on its delayed shadow if it
if (!pair)
return;
add_dependence (insn, pair->i1, REG_DEP_ANTI);
+ if (pair->stages)
+ return;
FOR_EACH_DEP (pair->i2, SD_LIST_BACK, sd_it, dep)
{
struct delay_pair *other_pair
= (struct delay_pair *)htab_find_with_hash (delay_htab_i2, pro,
htab_hash_pointer (pro));
- if (!other_pair)
+ if (!other_pair || other_pair->stages)
continue;
if (pair_delay (other_pair) >= pair_delay (pair))
{
static void extend_h_i_d (void);
static void init_h_i_d (rtx);
+static int haifa_speculate_insn (rtx, ds_t, rtx *);
static void generate_recovery_code (rtx);
static void process_insn_forw_deps_be_in_spec (rtx, rtx, ds_t);
static void begin_speculative_block (rtx);
static void init_before_recovery (basic_block *);
static void create_check_block_twin (rtx, bool);
static void fix_recovery_deps (basic_block);
-static void haifa_change_pattern (rtx, rtx);
+static bool haifa_change_pattern (rtx, rtx);
static void dump_new_block_header (int, basic_block, rtx, rtx);
static void restore_bb_notes (basic_block);
static void fix_jump_move (rtx);
/* Do register pressure sensitive insn scheduling if the flag is set
up. */
-bool sched_pressure_p;
+enum sched_pressure_algorithm sched_pressure;
/* Map regno -> its pressure class. The map defined only when
- SCHED_PRESSURE_P is true. */
+ SCHED_PRESSURE != SCHED_PRESSURE_NONE. */
enum reg_class *sched_regno_pressure_class;
/* The current register pressure. Only elements corresponding pressure
bitmap_clear (region_ref_regs);
}
-/* Update current register pressure related info after birth (if
- BIRTH_P) or death of register REGNO. */
-static void
-mark_regno_birth_or_death (int regno, bool birth_p)
+/* PRESSURE[CL] describes the pressure on register class CL. Update it
+ for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO.
+ LIVE tracks the set of live registers; if it is null, assume that
+ every birth or death is genuine. */
+static inline void
+mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p)
{
enum reg_class pressure_class;
{
if (birth_p)
{
- bitmap_set_bit (curr_reg_live, regno);
- curr_reg_pressure[pressure_class]
- += (ira_reg_class_max_nregs
- [pressure_class][PSEUDO_REGNO_MODE (regno)]);
+ if (!live || bitmap_set_bit (live, regno))
+ pressure[pressure_class]
+ += (ira_reg_class_max_nregs
+ [pressure_class][PSEUDO_REGNO_MODE (regno)]);
}
else
{
- bitmap_clear_bit (curr_reg_live, regno);
- curr_reg_pressure[pressure_class]
- -= (ira_reg_class_max_nregs
- [pressure_class][PSEUDO_REGNO_MODE (regno)]);
+ if (!live || bitmap_clear_bit (live, regno))
+ pressure[pressure_class]
+ -= (ira_reg_class_max_nregs
+ [pressure_class][PSEUDO_REGNO_MODE (regno)]);
}
}
}
{
if (birth_p)
{
- bitmap_set_bit (curr_reg_live, regno);
- curr_reg_pressure[pressure_class]++;
+ if (!live || bitmap_set_bit (live, regno))
+ pressure[pressure_class]++;
}
else
{
- bitmap_clear_bit (curr_reg_live, regno);
- curr_reg_pressure[pressure_class]--;
+ if (!live || bitmap_clear_bit (live, regno))
+ pressure[pressure_class]--;
}
}
}
curr_reg_pressure[ira_pressure_classes[i]] = 0;
bitmap_clear (curr_reg_live);
EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi)
- if (current_nr_blocks == 1 || bitmap_bit_p (region_ref_regs, j))
- mark_regno_birth_or_death (j, true);
+ if (sched_pressure == SCHED_PRESSURE_MODEL
+ || current_nr_blocks == 1
+ || bitmap_bit_p (region_ref_regs, j))
+ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure, j, true);
}
/* Mark registers in X as mentioned in the current region. */
if (regno == INVALID_REGNUM)
break;
if (! bitmap_bit_p (df_get_live_in (bb), regno))
- mark_regno_birth_or_death (regno, true);
+ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+ regno, true);
}
#endif
}
}
fprintf (sched_dump, "\n");
}
+\f
+/* Determine if INSN has a condition that is clobbered if a register
+ in SET_REGS is modified. */
+static bool
+cond_clobbered_p (rtx insn, HARD_REG_SET set_regs)
+{
+ rtx pat = PATTERN (insn);
+ gcc_assert (GET_CODE (pat) == COND_EXEC);
+ if (TEST_HARD_REG_BIT (set_regs, REGNO (XEXP (COND_EXEC_TEST (pat), 0))))
+ {
+ sd_iterator_def sd_it;
+ dep_t dep;
+ haifa_change_pattern (insn, ORIG_PAT (insn));
+ FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
+ DEP_STATUS (dep) &= ~DEP_CANCELLED;
+ TODO_SPEC (insn) = HARD_DEP;
+ if (sched_verbose >= 2)
+ fprintf (sched_dump,
+ ";;\t\tdequeue insn %s because of clobbered condition\n",
+ (*current_sched_info->print_insn) (insn, 0));
+ return true;
+ }
+
+ return false;
+}
+
+/* Look at the remaining dependencies for insn NEXT, and compute and return
+ the TODO_SPEC value we should use for it. This is called after one of
+ NEXT's dependencies has been resolved. */
+
+static ds_t
+recompute_todo_spec (rtx next)
+{
+ ds_t new_ds;
+ sd_iterator_def sd_it;
+ dep_t dep, control_dep = NULL;
+ int n_spec = 0;
+ int n_control = 0;
+ bool first_p = true;
+
+ if (sd_lists_empty_p (next, SD_LIST_BACK))
+ /* NEXT has all its dependencies resolved. */
+ return 0;
+
+ if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
+ return HARD_DEP;
+
+ /* Now we've got NEXT with speculative deps only.
+ 1. Look at the deps to see what we have to do.
+ 2. Check if we can do 'todo'. */
+ new_ds = 0;
+
+ FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep)
+ {
+ ds_t ds = DEP_STATUS (dep) & SPECULATIVE;
+
+ if (DEBUG_INSN_P (DEP_PRO (dep)) && !DEBUG_INSN_P (next))
+ continue;
+
+ if (ds)
+ {
+ n_spec++;
+ if (first_p)
+ {
+ first_p = false;
+
+ new_ds = ds;
+ }
+ else
+ new_ds = ds_merge (new_ds, ds);
+ }
+ if (DEP_TYPE (dep) == REG_DEP_CONTROL)
+ {
+ n_control++;
+ control_dep = dep;
+ DEP_STATUS (dep) &= ~DEP_CANCELLED;
+ }
+ }
+
+ if (n_control == 1 && n_spec == 0)
+ {
+ rtx pro, other, new_pat;
+ rtx cond = NULL_RTX;
+ bool success;
+ rtx prev = NULL_RTX;
+ int i;
+ unsigned regno;
+
+ if ((current_sched_info->flags & DO_PREDICATION) == 0
+ || (ORIG_PAT (next) != NULL_RTX
+ && PREDICATED_PAT (next) == NULL_RTX))
+ return HARD_DEP;
+
+ pro = DEP_PRO (control_dep);
+ other = real_insn_for_shadow (pro);
+ if (other != NULL_RTX)
+ pro = other;
+
+ cond = sched_get_reverse_condition_uncached (pro);
+ regno = REGNO (XEXP (cond, 0));
+
+ /* Find the last scheduled insn that modifies the condition register.
+ We can stop looking once we find the insn we depend on through the
+ REG_DEP_CONTROL; if the condition register isn't modified after it,
+ we know that it still has the right value. */
+ if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED)
+ FOR_EACH_VEC_ELT_REVERSE (rtx, scheduled_insns, i, prev)
+ {
+ HARD_REG_SET t;
+ find_all_hard_reg_sets (prev, &t);
+ if (TEST_HARD_REG_BIT (t, regno))
+ return HARD_DEP;
+ if (prev == pro)
+ break;
+ }
+ if (ORIG_PAT (next) == NULL_RTX)
+ {
+ ORIG_PAT (next) = PATTERN (next);
+
+ new_pat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (next));
+ success = haifa_change_pattern (next, new_pat);
+ if (!success)
+ return HARD_DEP;
+ PREDICATED_PAT (next) = new_pat;
+ }
+ else if (PATTERN (next) != PREDICATED_PAT (next))
+ {
+ bool success = haifa_change_pattern (next,
+ PREDICATED_PAT (next));
+ gcc_assert (success);
+ }
+ DEP_STATUS (control_dep) |= DEP_CANCELLED;
+ return DEP_CONTROL;
+ }
+
+ if (PREDICATED_PAT (next) != NULL_RTX)
+ {
+ int tick = INSN_TICK (next);
+ bool success = haifa_change_pattern (next,
+ ORIG_PAT (next));
+ INSN_TICK (next) = tick;
+ gcc_assert (success);
+ }
+
+ /* We can't handle the case where there are both speculative and control
+ dependencies, so we return HARD_DEP in such a case. Also fail if
+ we have speculative dependencies with not enough points, or more than
+ one control dependency. */
+ if ((n_spec > 0 && n_control > 0)
+ || (n_spec > 0
+ /* Too few points? */
+ && ds_weak (new_ds) < spec_info->data_weakness_cutoff)
+ || (n_control > 1))
+ return HARD_DEP;
+
+ return new_ds;
+}
+\f
/* Pointer to the last instruction scheduled. */
static rtx last_scheduled_insn;
return true;
}
-/* Compute the number of nondebug forward deps of an insn. */
+/* Compute the number of nondebug deps in list LIST for INSN. */
static int
-dep_list_size (rtx insn)
+dep_list_size (rtx insn, sd_list_types_def list)
{
sd_iterator_def sd_it;
dep_t dep;
int dbgcount = 0, nodbgcount = 0;
if (!MAY_HAVE_DEBUG_INSNS)
- return sd_lists_size (insn, SD_LIST_FORW);
+ return sd_lists_size (insn, list);
- FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+ FOR_EACH_DEP (insn, list, sd_it, dep)
{
if (DEBUG_INSN_P (DEP_CON (dep)))
dbgcount++;
nodbgcount++;
}
- gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, SD_LIST_FORW));
+ gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list));
return nodbgcount;
}
{
int this_priority = -1;
- if (dep_list_size (insn) == 0)
+ if (dep_list_size (insn, SD_LIST_FORW) == 0)
/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
some forward deps but all of them are ignored by
contributes_to_priority hook. At the moment we set priority of
qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
while (0)
+/* For each pressure class CL, set DEATH[CL] to the number of registers
+ in that class that die in INSN. */
+
+static void
+calculate_reg_deaths (rtx insn, int *death)
+{
+ int i;
+ struct reg_use_data *use;
+
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ death[ira_pressure_classes[i]] = 0;
+ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+ if (dying_use_p (use))
+ mark_regno_birth_or_death (0, death, use->regno, true);
+}
+
/* Setup info about the current register pressure impact of scheduling
INSN at the current scheduling point. */
static void
enum reg_class cl;
struct reg_pressure_data *pressure_info;
int *max_reg_pressure;
- struct reg_use_data *use;
static int death[N_REG_CLASSES];
gcc_checking_assert (!DEBUG_INSN_P (insn));
excess_cost_change = 0;
- for (i = 0; i < ira_pressure_classes_num; i++)
- death[ira_pressure_classes[i]] = 0;
- for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
- if (dying_use_p (use))
- {
- cl = sched_regno_pressure_class[use->regno];
- if (use->regno < FIRST_PSEUDO_REGISTER)
- death[cl]++;
- else
- death[cl]
- += ira_reg_class_max_nregs[cl][PSEUDO_REGNO_MODE (use->regno)];
- }
+ calculate_reg_deaths (insn, death);
pressure_info = INSN_REG_PRESSURE (insn);
max_reg_pressure = INSN_MAX_REG_PRESSURE (insn);
gcc_assert (pressure_info != NULL && max_reg_pressure != NULL);
}
INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change;
}
+\f
+/* This is the first page of code related to SCHED_PRESSURE_MODEL.
+ It tries to make the scheduler take register pressure into account
+ without introducing too many unnecessary stalls. It hooks into the
+ main scheduling algorithm at several points:
+
+ - Before scheduling starts, model_start_schedule constructs a
+ "model schedule" for the current block. This model schedule is
+ chosen solely to keep register pressure down. It does not take the
+ target's pipeline or the original instruction order into account,
+ except as a tie-breaker. It also doesn't work to a particular
+ pressure limit.
+
+ This model schedule gives us an idea of what pressure can be
+ achieved for the block and gives us an example of a schedule that
+ keeps to that pressure. It also makes the final schedule less
+ dependent on the original instruction order. This is important
+ because the original order can either be "wide" (many values live
+ at once, such as in user-scheduled code) or "narrow" (few values
+ live at once, such as after loop unrolling, where several
+ iterations are executed sequentially).
+
+ We do not apply this model schedule to the rtx stream. We simply
+ record it in model_schedule. We also compute the maximum pressure,
+ MP, that was seen during this schedule.
+
+ - Instructions are added to the ready queue even if they require
+ a stall. The length of the stall is instead computed as:
+
+ MAX (INSN_TICK (INSN) - clock_var, 0)
+
+ (= insn_delay). This allows rank_for_schedule to choose between
+ introducing a deliberate stall or increasing pressure.
+
+ - Before sorting the ready queue, model_set_excess_costs assigns
+ a pressure-based cost to each ready instruction in the queue.
+ This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE
+ (ECC for short) and is effectively measured in cycles.
+
+ - rank_for_schedule ranks instructions based on:
+
+ ECC (insn) + insn_delay (insn)
+
+ then as:
+
+ insn_delay (insn)
+
+ So, for example, an instruction X1 with an ECC of 1 that can issue
+ now will win over an instruction X0 with an ECC of zero that would
+ introduce a stall of one cycle. However, an instruction X2 with an
+ ECC of 2 that can issue now will lose to both X0 and X1.
+
+ - When an instruction is scheduled, model_recompute updates the model
+ schedule with the new pressures (some of which might now exceed the
+ original maximum pressure MP). model_update_limit_points then searches
+ for the new point of maximum pressure, if not already known. */
+
+/* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL
+ from surrounding debug information. */
+#define MODEL_BAR \
+ ";;\t\t+------------------------------------------------------\n"
+
+/* Information about the pressure on a particular register class at a
+ particular point of the model schedule. */
+struct model_pressure_data {
+ /* The pressure at this point of the model schedule, or -1 if the
+ point is associated with an instruction that has already been
+ scheduled. */
+ int ref_pressure;
+
+ /* The maximum pressure during or after this point of the model schedule. */
+ int max_pressure;
+};
-/* Returns a positive value if x is preferred; returns a negative value if
- y is preferred. Should never return 0, since that will make the sort
- unstable. */
+/* Per-instruction information that is used while building the model
+ schedule. Here, "schedule" refers to the model schedule rather
+ than the main schedule. */
+struct model_insn_info {
+ /* The instruction itself. */
+ rtx insn;
-static int
-rank_for_schedule (const void *x, const void *y)
-{
- rtx tmp = *(const rtx *) y;
- rtx tmp2 = *(const rtx *) x;
- int tmp_class, tmp2_class;
- int val, priority_val, info_val;
+ /* If this instruction is in model_worklist, these fields link to the
+ previous (higher-priority) and next (lower-priority) instructions
+ in the list. */
+ struct model_insn_info *prev;
+ struct model_insn_info *next;
+
+ /* While constructing the schedule, QUEUE_INDEX describes whether an
+ instruction has already been added to the schedule (QUEUE_SCHEDULED),
+ is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE).
+ old_queue records the value that QUEUE_INDEX had before scheduling
+ started, so that we can restore it once the schedule is complete. */
+ int old_queue;
+
+ /* The relative importance of an unscheduled instruction. Higher
+ values indicate greater importance. */
+ unsigned int model_priority;
+
+ /* The length of the longest path of satisfied true dependencies
+ that leads to this instruction. */
+ unsigned int depth;
+
+ /* The length of the longest path of dependencies of any kind
+ that leads from this instruction. */
+ unsigned int alap;
+
+ /* The number of predecessor nodes that must still be scheduled. */
+ int unscheduled_preds;
+};
- if (MAY_HAVE_DEBUG_INSNS)
- {
- /* Schedule debug insns as early as possible. */
- if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2))
- return -1;
- else if (DEBUG_INSN_P (tmp2))
- return 1;
- }
+/* Information about the pressure limit for a particular register class.
+ This structure is used when applying a model schedule to the main
+ schedule. */
+struct model_pressure_limit {
+ /* The maximum register pressure seen in the original model schedule. */
+ int orig_pressure;
- /* The insn in a schedule group should be issued the first. */
- if (flag_sched_group_heuristic &&
- SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
- return SCHED_GROUP_P (tmp2) ? 1 : -1;
+ /* The maximum register pressure seen in the current model schedule
+ (which excludes instructions that have already been scheduled). */
+ int pressure;
- /* Make sure that priority of TMP and TMP2 are initialized. */
- gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
+ /* The point of the current model schedule at which PRESSURE is first
+ reached. It is set to -1 if the value needs to be recomputed. */
+ int point;
+};
- if (sched_pressure_p)
- {
- int diff;
+/* Describes a particular way of measuring register pressure. */
+struct model_pressure_group {
+ /* Index PCI describes the maximum pressure on ira_pressure_classes[PCI]. */
+ struct model_pressure_limit limits[N_REG_CLASSES];
- /* Prefer insn whose scheduling results in the smallest register
- pressure excess. */
- if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp)
- + (INSN_TICK (tmp) > clock_var
- ? INSN_TICK (tmp) - clock_var : 0)
- - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2)
- - (INSN_TICK (tmp2) > clock_var
- ? INSN_TICK (tmp2) - clock_var : 0))) != 0)
- return diff;
- }
+ /* Index (POINT * ira_num_pressure_classes + PCI) describes the pressure
+ on register class ira_pressure_classes[PCI] at point POINT of the
+ current model schedule. A POINT of model_num_insns describes the
+ pressure at the end of the schedule. */
+ struct model_pressure_data *model;
+};
+/* Index POINT gives the instruction at point POINT of the model schedule.
+ This array doesn't change during main scheduling. */
+static VEC (rtx, heap) *model_schedule;
- if (sched_pressure_p
- && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var))
- {
- if (INSN_TICK (tmp) <= clock_var)
- return -1;
- else if (INSN_TICK (tmp2) <= clock_var)
- return 1;
- else
- return INSN_TICK (tmp) - INSN_TICK (tmp2);
- }
+/* The list of instructions in the model worklist, sorted in order of
+ decreasing priority. */
+static struct model_insn_info *model_worklist;
- /* If we are doing backtracking in this schedule, prefer insns that
- have forward dependencies with negative cost against an insn that
- was already scheduled. */
- if (current_sched_info->flags & DO_BACKTRACKING)
- {
- priority_val = FEEDS_BACKTRACK_INSN (tmp2) - FEEDS_BACKTRACK_INSN (tmp);
- if (priority_val)
- return priority_val;
- }
+/* Index I describes the instruction with INSN_LUID I. */
+static struct model_insn_info *model_insns;
- /* Prefer insn with higher priority. */
- priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
+/* The number of instructions in the model schedule. */
+static int model_num_insns;
- if (flag_sched_critical_path_heuristic && priority_val)
- return priority_val;
+/* The index of the first instruction in model_schedule that hasn't yet been
+ added to the main schedule, or model_num_insns if all of them have. */
+static int model_curr_point;
- /* Prefer speculative insn with greater dependencies weakness. */
- if (flag_sched_spec_insn_heuristic && spec_info)
- {
- ds_t ds1, ds2;
- dw_t dw1, dw2;
- int dw;
+/* Describes the pressure before each instruction in the model schedule. */
+static struct model_pressure_group model_before_pressure;
- ds1 = TODO_SPEC (tmp) & SPECULATIVE;
- if (ds1)
- dw1 = ds_weak (ds1);
- else
- dw1 = NO_DEP_WEAK;
+/* The first unused model_priority value (as used in model_insn_info). */
+static unsigned int model_next_priority;
- ds2 = TODO_SPEC (tmp2) & SPECULATIVE;
- if (ds2)
- dw2 = ds_weak (ds2);
- else
- dw2 = NO_DEP_WEAK;
- dw = dw2 - dw1;
- if (dw > (NO_DEP_WEAK / 8) || dw < -(NO_DEP_WEAK / 8))
- return dw;
- }
+/* The model_pressure_data for ira_pressure_classes[PCI] in GROUP
+ at point POINT of the model schedule. */
+#define MODEL_PRESSURE_DATA(GROUP, POINT, PCI) \
+ (&(GROUP)->model[(POINT) * ira_pressure_classes_num + (PCI)])
- info_val = (*current_sched_info->rank) (tmp, tmp2);
- if(flag_sched_rank_heuristic && info_val)
- return info_val;
+/* The maximum pressure on ira_pressure_classes[PCI] in GROUP at or
+ after point POINT of the model schedule. */
+#define MODEL_MAX_PRESSURE(GROUP, POINT, PCI) \
+ (MODEL_PRESSURE_DATA (GROUP, POINT, PCI)->max_pressure)
- /* Compare insns based on their relation to the last scheduled
- non-debug insn. */
- if (flag_sched_last_insn_heuristic && last_nondebug_scheduled_insn)
- {
- dep_t dep1;
- dep_t dep2;
- rtx last = last_nondebug_scheduled_insn;
+/* The pressure on ira_pressure_classes[PCI] in GROUP at point POINT
+ of the model schedule. */
+#define MODEL_REF_PRESSURE(GROUP, POINT, PCI) \
+ (MODEL_PRESSURE_DATA (GROUP, POINT, PCI)->ref_pressure)
- /* Classify the instructions into three classes:
- 1) Data dependent on last schedule insn.
- 2) Anti/Output dependent on last scheduled insn.
- 3) Independent of last scheduled insn, or has latency of one.
- Choose the insn from the highest numbered class if different. */
- dep1 = sd_find_dep_between (last, tmp, true);
+/* Information about INSN that is used when creating the model schedule. */
+#define MODEL_INSN_INFO(INSN) \
+ (&model_insns[INSN_LUID (INSN)])
- if (dep1 == NULL || dep_cost (dep1) == 1)
- tmp_class = 3;
- else if (/* Data dependence. */
- DEP_TYPE (dep1) == REG_DEP_TRUE)
- tmp_class = 1;
- else
- tmp_class = 2;
+/* The instruction at point POINT of the model schedule. */
+#define MODEL_INSN(POINT) \
+ (VEC_index (rtx, model_schedule, POINT))
- dep2 = sd_find_dep_between (last, tmp2, true);
- if (dep2 == NULL || dep_cost (dep2) == 1)
- tmp2_class = 3;
- else if (/* Data dependence. */
- DEP_TYPE (dep2) == REG_DEP_TRUE)
- tmp2_class = 1;
- else
- tmp2_class = 2;
+/* Return INSN's index in the model schedule, or model_num_insns if it
+ doesn't belong to that schedule. */
- if ((val = tmp2_class - tmp_class))
- return val;
- }
+static int
+model_index (rtx insn)
+{
+ if (INSN_MODEL_INDEX (insn) == 0)
+ return model_num_insns;
+ return INSN_MODEL_INDEX (insn) - 1;
+}
- /* Prefer the insn which has more later insns that depend on it.
- This gives the scheduler more freedom when scheduling later
- instructions at the expense of added register pressure. */
+/* Make sure that GROUP->limits is up-to-date for the current point
+ of the model schedule. */
- val = (dep_list_size (tmp2) - dep_list_size (tmp));
+static void
+model_update_limit_points_in_group (struct model_pressure_group *group)
+{
+ int pci, max_pressure, point;
- if (flag_sched_dep_count_heuristic && val != 0)
- return val;
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ /* We may have passed the final point at which the pressure in
+ group->limits[pci].pressure was reached. Update the limit if so. */
+ max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, pci);
+ group->limits[pci].pressure = max_pressure;
- /* If insns are equally good, sort by INSN_LUID (original insn order),
- so that we make the sort stable. This minimizes instruction movement,
- thus minimizing sched's effect on debugging and cross-jumping. */
- return INSN_LUID (tmp) - INSN_LUID (tmp2);
+ /* Find the point at which MAX_PRESSURE is first reached. We need
+ to search in three cases:
+
+ - We've already moved past the previous pressure point.
+ In this case we search forward from model_curr_point.
+
+ - We scheduled the previous point of maximum pressure ahead of
+ its position in the model schedule, but doing so didn't bring
+ the pressure point earlier. In this case we search forward
+ from that previous pressure point.
+
+ - Scheduling an instruction early caused the maximum pressure
+ to decrease. In this case we will have set the pressure
+ point to -1, and we search forward from model_curr_point. */
+ point = MAX (group->limits[pci].point, model_curr_point);
+ while (point < model_num_insns
+ && MODEL_REF_PRESSURE (group, point, pci) < max_pressure)
+ point++;
+ group->limits[pci].point = point;
+
+ gcc_assert (MODEL_REF_PRESSURE (group, point, pci) == max_pressure);
+ gcc_assert (MODEL_MAX_PRESSURE (group, point, pci) == max_pressure);
+ }
}
-/* Resort the array A in which only element at index N may be out of order. */
+/* Make sure that all register-pressure limits are up-to-date for the
+ current position in the model schedule. */
-HAIFA_INLINE static void
-swap_sort (rtx *a, int n)
+static void
+model_update_limit_points (void)
{
- rtx insn = a[n - 1];
- int i = n - 2;
+ model_update_limit_points_in_group (&model_before_pressure);
+}
- while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
+/* Return the model_index of the last unscheduled use in chain USE
+ outside of USE's instruction. Return -1 if there are no other uses,
+ or model_num_insns if the register is live at the end of the block. */
+
+static int
+model_last_use_except (struct reg_use_data *use)
+{
+ struct reg_use_data *next;
+ int last, index;
+
+ last = -1;
+ for (next = use->next_regno_use; next != use; next = next->next_regno_use)
+ if (NONDEBUG_INSN_P (next->insn)
+ && QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED)
+ {
+ index = model_index (next->insn);
+ if (index == model_num_insns)
+ return model_num_insns;
+ if (last < index)
+ last = index;
+ }
+ return last;
+}
+
+/* An instruction with model_index POINT has just been scheduled, and it
+ adds DELTA to the pressure on ira_pressure_classes[PCI] after POINT - 1.
+ Update MODEL_REF_PRESSURE (GROUP, POINT, PCI) and
+ MODEL_MAX_PRESSURE (GROUP, POINT, PCI) accordingly. */
+
+static void
+model_start_update_pressure (struct model_pressure_group *group,
+ int point, int pci, int delta)
+{
+ int next_max_pressure;
+
+ if (point == model_num_insns)
{
- a[i + 1] = a[i];
- i -= 1;
+ /* The instruction wasn't part of the model schedule; it was moved
+ from a different block. Update the pressure for the end of
+ the model schedule. */
+ MODEL_REF_PRESSURE (group, point, pci) += delta;
+ MODEL_MAX_PRESSURE (group, point, pci) += delta;
+ }
+ else
+ {
+ /* Record that this instruction has been scheduled. Nothing now
+ changes between POINT and POINT + 1, so get the maximum pressure
+ from the latter. If the maximum pressure decreases, the new
+ pressure point may be before POINT. */
+ MODEL_REF_PRESSURE (group, point, pci) = -1;
+ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, pci);
+ if (MODEL_MAX_PRESSURE (group, point, pci) > next_max_pressure)
+ {
+ MODEL_MAX_PRESSURE (group, point, pci) = next_max_pressure;
+ if (group->limits[pci].point == point)
+ group->limits[pci].point = -1;
+ }
}
- a[i + 1] = insn;
}
-/* Add INSN to the insn queue so that it can be executed at least
- N_CYCLES after the currently executing insn. Preserve insns
- chain for debugging purposes. REASON will be printed in debugging
- output. */
+/* Record that scheduling a later instruction has changed the pressure
+ at point POINT of the model schedule by DELTA (which might be 0).
+ Update GROUP accordingly. Return nonzero if these changes might
+ trigger changes to previous points as well. */
-HAIFA_INLINE static void
-queue_insn (rtx insn, int n_cycles, const char *reason)
+static int
+model_update_pressure (struct model_pressure_group *group,
+ int point, int pci, int delta)
{
- int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
- rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
- int new_tick;
+ int ref_pressure, max_pressure, next_max_pressure;
- gcc_assert (n_cycles <= max_insn_queue_index);
- gcc_assert (!DEBUG_INSN_P (insn));
+ /* If POINT hasn't yet been scheduled, update its pressure. */
+ ref_pressure = MODEL_REF_PRESSURE (group, point, pci);
+ if (ref_pressure >= 0 && delta != 0)
+ {
+ ref_pressure += delta;
+ MODEL_REF_PRESSURE (group, point, pci) = ref_pressure;
- insn_queue[next_q] = link;
- q_size += 1;
+ /* Check whether the maximum pressure in the overall schedule
+ has increased. (This means that the MODEL_MAX_PRESSURE of
+ every point <= POINT will need to increae too; see below.) */
+ if (group->limits[pci].pressure < ref_pressure)
+ group->limits[pci].pressure = ref_pressure;
- if (sched_verbose >= 2)
- {
- fprintf (sched_dump, ";;\t\tReady-->Q: insn %s: ",
- (*current_sched_info->print_insn) (insn, 0));
+ /* If we are at maximum pressure, and the maximum pressure
+ point was previously unknown or later than POINT,
+ bring it forward. */
+ if (group->limits[pci].pressure == ref_pressure
+ && !IN_RANGE (group->limits[pci].point, 0, point))
+ group->limits[pci].point = point;
- fprintf (sched_dump, "queued for %d cycles (%s).\n", n_cycles, reason);
+ /* If POINT used to be the point of maximum pressure, but isn't
+ any longer, we need to recalculate it using a forward walk. */
+ if (group->limits[pci].pressure > ref_pressure
+ && group->limits[pci].point == point)
+ group->limits[pci].point = -1;
}
- QUEUE_INDEX (insn) = next_q;
-
- if (current_sched_info->flags & DO_BACKTRACKING)
+ /* Update the maximum pressure at POINT. Changes here might also
+ affect the maximum pressure at POINT - 1. */
+ next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, pci);
+ max_pressure = MAX (ref_pressure, next_max_pressure);
+ if (MODEL_MAX_PRESSURE (group, point, pci) != max_pressure)
{
- new_tick = clock_var + n_cycles;
- if (INSN_TICK (insn) == INVALID_TICK || INSN_TICK (insn) < new_tick)
- INSN_TICK (insn) = new_tick;
+ MODEL_MAX_PRESSURE (group, point, pci) = max_pressure;
+ return 1;
+ }
+ return 0;
+}
- if (INSN_EXACT_TICK (insn) != INVALID_TICK
- && INSN_EXACT_TICK (insn) < clock_var + n_cycles)
+/* INSN has just been scheduled. Update the model schedule accordingly. */
+
+static void
+model_recompute (rtx insn)
+{
+ struct {
+ int last_use;
+ int regno;
+ } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS];
+ struct reg_use_data *use;
+ struct reg_pressure_data *reg_pressure;
+ int delta[N_REG_CLASSES];
+ int pci, point, mix, new_last, cl, ref_pressure, queue;
+ unsigned int i, num_uses, num_pending_births;
+ bool print_p;
+
+ /* The destinations of INSN were previously live from POINT onwards, but are
+ now live from model_curr_point onwards. Set up DELTA accordingly. */
+ point = model_index (insn);
+ reg_pressure = INSN_REG_PRESSURE (insn);
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ delta[cl] = reg_pressure[pci].set_increase;
+ }
+
+ /* Record which registers previously died at POINT, but which now die
+ before POINT. Adjust DELTA so that it represents the effect of
+ this change after POINT - 1. Set NUM_PENDING_BIRTHS to the number of
+ registers that will be born in the range [model_curr_point, POINT). */
+ num_uses = 0;
+ num_pending_births = 0;
+ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+ {
+ new_last = model_last_use_except (use);
+ if (new_last < point)
{
- must_backtrack = true;
- if (sched_verbose >= 2)
- fprintf (sched_dump, ";;\t\tcausing a backtrack.\n");
+ gcc_assert (num_uses < ARRAY_SIZE (uses));
+ uses[num_uses].last_use = new_last;
+ uses[num_uses].regno = use->regno;
+ /* This register is no longer live after POINT - 1. */
+ mark_regno_birth_or_death (NULL, delta, use->regno, false);
+ num_uses++;
+ if (new_last >= 0)
+ num_pending_births++;
}
}
+
+ /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT.
+ Also set each group pressure limit for POINT. */
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ model_start_update_pressure (&model_before_pressure,
+ point, pci, delta[cl]);
+ }
+
+ /* Walk the model schedule backwards, starting immediately before POINT. */
+ print_p = false;
+ if (point != model_curr_point)
+ do
+ {
+ point--;
+ insn = MODEL_INSN (point);
+ queue = QUEUE_INDEX (insn);
+
+ if (queue != QUEUE_SCHEDULED)
+ {
+ /* DELTA describes the effect of the move on the register pressure
+ after POINT. Make it describe the effect on the pressure
+ before POINT. */
+ i = 0;
+ while (i < num_uses)
+ {
+ if (uses[i].last_use == point)
+ {
+ /* This register is now live again. */
+ mark_regno_birth_or_death (NULL, delta,
+ uses[i].regno, true);
+
+ /* Remove this use from the array. */
+ uses[i] = uses[num_uses - 1];
+ num_uses--;
+ num_pending_births--;
+ }
+ else
+ i++;
+ }
+
+ if (sched_verbose >= 5)
+ {
+ char buf[2048];
+
+ if (!print_p)
+ {
+ fprintf (sched_dump, MODEL_BAR);
+ fprintf (sched_dump, ";;\t\t| New pressure for model"
+ " schedule\n");
+ fprintf (sched_dump, MODEL_BAR);
+ print_p = true;
+ }
+
+ print_pattern (buf, PATTERN (insn), 0);
+ fprintf (sched_dump, ";;\t\t| %3d %4d %-30s ",
+ point, INSN_UID (insn), buf);
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure,
+ point, pci);
+ fprintf (sched_dump, " %s:[%d->%d]",
+ reg_class_names[ira_pressure_classes[pci]],
+ ref_pressure, ref_pressure + delta[cl]);
+ }
+ fprintf (sched_dump, "\n");
+ }
+ }
+
+ /* Adjust the pressure at POINT. Set MIX to nonzero if POINT - 1
+ might have changed as well. */
+ mix = num_pending_births;
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ mix |= delta[cl];
+ mix |= model_update_pressure (&model_before_pressure,
+ point, pci, delta[cl]);
+ }
+ }
+ while (mix && point > model_curr_point);
+
+ if (print_p)
+ fprintf (sched_dump, MODEL_BAR);
}
+\f
+/* model_spill_cost (CL, P, P') returns the cost of increasing the
+ pressure on CL from P to P'. We use this to calculate a "base ECC",
+ baseECC (CL, X), for each pressure class CL and each instruction X.
+ Supposing X changes the pressure on CL from P to P', and that the
+ maximum pressure on CL in the current model schedule is MP', then:
-/* Remove INSN from queue. */
-static void
-queue_remove (rtx insn)
+ * if X occurs before or at the next point of maximum pressure in
+ the model schedule and P' > MP', then:
+
+ baseECC (CL, X) = model_spill_cost (CL, MP, P')
+
+ The idea is that the pressure after scheduling a fixed set of
+ instructions -- in this case, the set up to and including the
+ next maximum pressure point -- is going to be the same regardless
+ of the order; we simply want to keep the intermediate pressure
+ under control. Thus X has a cost of zero unless scheduling it
+ now would exceed MP'.
+
+ If all increases in the set are by the same amount, no zero-cost
+ instruction will ever cause the pressure to exceed MP'. However,
+ if X is instead moved past an instruction X' with pressure in the
+ range (MP' - (P' - P), MP'), the pressure at X' will increase
+ beyond MP'. Since baseECC is very much a heuristic anyway,
+ it doesn't seem worth the overhead of tracking cases like these.
+
+ The cost of exceeding MP' is always based on the original maximum
+ pressure MP. This is so that going 2 registers over the original
+ limit has the same cost regardless of whether it comes from two
+ separate +1 deltas or from a single +2 delta.
+
+ * if X occurs after the next point of maximum pressure in the model
+ schedule and P' > P, then:
+
+ baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P))
+
+ That is, if we move X forward across a point of maximum pressure,
+ and if X increases the pressure by P' - P, then we conservatively
+ assume that scheduling X next would increase the maximum pressure
+ by P' - P. Again, the cost of doing this is based on the original
+ maximum pressure MP, for the same reason as above.
+
+ * if P' < P, P > MP, and X occurs at or after the next point of
+ maximum pressure, then:
+
+ baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P)
+
+ That is, if we have already exceeded the original maximum pressure MP,
+ and if X might reduce the maximum pressure again -- or at least push
+ it further back, and thus allow more scheduling freedom -- it is given
+ a negative cost to reflect the improvement.
+
+ * otherwise,
+
+ baseECC (CL, X) = 0
+
+ In this case, X is not expected to affect the maximum pressure MP',
+ so it has zero cost.
+
+ We then create a combined value baseECC (X) that is the sum of
+ baseECC (CL, X) for each pressure class CL.
+
+ baseECC (X) could itself be used as the ECC value described above.
+ However, this is often too conservative, in the sense that it
+ tends to make high-priority instructions that increase pressure
+ wait too long in cases where introducing a spill would be better.
+ For this reason the final ECC is a priority-adjusted form of
+ baseECC (X). Specifically, we calculate:
+
+ P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X)
+ baseP = MAX { P (X) | baseECC (X) <= 0 }
+
+ Then:
+
+ ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0)
+
+ Thus an instruction's effect on pressure is ignored if it has a high
+ enough priority relative to the ones that don't increase pressure.
+ Negative values of baseECC (X) do not increase the priority of X
+ itself, but they do make it harder for other instructions to
+ increase the pressure further.
+
+ This pressure cost is deliberately timid. The intention has been
+ to choose a heuristic that rarely interferes with the normal list
+ scheduler in cases where that scheduler would produce good code.
+ We simply want to curb some of its worst excesses. */
+
+/* Return the cost of increasing the pressure in class CL from FROM to TO.
+
+ Here we use the very simplistic cost model that every register above
+ ira_available_class_regs[CL] has a spill cost of 1. We could use other
+ measures instead, such as one based on MEMORY_MOVE_COST. However:
+
+ (1) In order for an instruction to be scheduled, the higher cost
+ would need to be justified in a single saving of that many stalls.
+ This is overly pessimistic, because the benefit of spilling is
+ often to avoid a sequence of several short stalls rather than
+ a single long one.
+
+ (2) The cost is still arbitrary. Because we are not allocating
+ registers during scheduling, we have no way of knowing for
+ sure how many memory accesses will be required by each spill,
+ where the spills will be placed within the block, or even
+ which block(s) will contain the spills.
+
+ So a higher cost than 1 is often too conservative in practice,
+ forcing blocks to contain unnecessary stalls instead of spill code.
+ The simple cost below seems to be the best compromise. It reduces
+ the interference with the normal list scheduler, which helps make
+ it more suitable for a default-on option. */
+
+static int
+model_spill_cost (int cl, int from, int to)
{
- gcc_assert (QUEUE_INDEX (insn) >= 0);
- remove_free_INSN_LIST_elem (insn, &insn_queue[QUEUE_INDEX (insn)]);
- q_size--;
- QUEUE_INDEX (insn) = QUEUE_NOWHERE;
+ from = MAX (from, ira_available_class_regs[cl]);
+ return MAX (to, from) - from;
}
-/* Return a pointer to the bottom of the ready list, i.e. the insn
- with the lowest priority. */
+/* Return baseECC (ira_pressure_classes[PCI], POINT), given that
+ P = curr_reg_pressure[ira_pressure_classes[PCI]] and that
+ P' = P + DELTA. */
-rtx *
-ready_lastpos (struct ready_list *ready)
+static int
+model_excess_group_cost (struct model_pressure_group *group,
+ int point, int pci, int delta)
{
- gcc_assert (ready->n_ready >= 1);
- return ready->vec + ready->first - ready->n_ready + 1;
+ int pressure, cl;
+
+ cl = ira_pressure_classes[pci];
+ if (delta < 0 && point >= group->limits[pci].point)
+ {
+ pressure = MAX (group->limits[pci].orig_pressure,
+ curr_reg_pressure[cl] + delta);
+ return -model_spill_cost (cl, pressure, curr_reg_pressure[cl]);
+ }
+
+ if (delta > 0)
+ {
+ if (point > group->limits[pci].point)
+ pressure = group->limits[pci].pressure + delta;
+ else
+ pressure = curr_reg_pressure[cl] + delta;
+
+ if (pressure > group->limits[pci].pressure)
+ return model_spill_cost (cl, group->limits[pci].orig_pressure,
+ pressure);
+ }
+
+ return 0;
}
-/* Add an element INSN to the ready list so that it ends up with the
- lowest/highest priority depending on FIRST_P. */
+/* Return baseECC (MODEL_INSN (INSN)). Dump the costs to sched_dump
+ if PRINT_P. */
-HAIFA_INLINE static void
-ready_add (struct ready_list *ready, rtx insn, bool first_p)
+static int
+model_excess_cost (rtx insn, bool print_p)
{
- if (!first_p)
+ int point, pci, cl, cost, this_cost, delta;
+ struct reg_pressure_data *insn_reg_pressure;
+ int insn_death[N_REG_CLASSES];
+
+ calculate_reg_deaths (insn, insn_death);
+ point = model_index (insn);
+ insn_reg_pressure = INSN_REG_PRESSURE (insn);
+ cost = 0;
+
+ if (print_p)
+ fprintf (sched_dump, ";;\t\t| %3d %4d | %4d %+3d |", point,
+ INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn));
+
+ /* Sum up the individual costs for each register class. */
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
{
- if (ready->first == ready->n_ready)
- {
- memmove (ready->vec + ready->veclen - ready->n_ready,
- ready_lastpos (ready),
- ready->n_ready * sizeof (rtx));
- ready->first = ready->veclen - 1;
- }
- ready->vec[ready->first - ready->n_ready] = insn;
+ cl = ira_pressure_classes[pci];
+ delta = insn_reg_pressure[pci].set_increase - insn_death[cl];
+ this_cost = model_excess_group_cost (&model_before_pressure,
+ point, pci, delta);
+ cost += this_cost;
+ if (print_p)
+ fprintf (sched_dump, " %s:[%d base cost %d]",
+ reg_class_names[cl], delta, this_cost);
}
- else
+
+ if (print_p)
+ fprintf (sched_dump, "\n");
+
+ return cost;
+}
+
+/* Dump the next points of maximum pressure for GROUP. */
+
+static void
+model_dump_pressure_points (struct model_pressure_group *group)
+{
+ int pci, cl;
+
+ fprintf (sched_dump, ";;\t\t| pressure points");
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
{
- if (ready->first == ready->veclen - 1)
- {
- if (ready->n_ready)
- /* ready_lastpos() fails when called with (ready->n_ready == 0). */
- memmove (ready->vec + ready->veclen - ready->n_ready - 1,
- ready_lastpos (ready),
- ready->n_ready * sizeof (rtx));
- ready->first = ready->veclen - 2;
- }
- ready->vec[++(ready->first)] = insn;
+ cl = ira_pressure_classes[pci];
+ fprintf (sched_dump, " %s:[%d->%d at ", reg_class_names[cl],
+ curr_reg_pressure[cl], group->limits[pci].pressure);
+ if (group->limits[pci].point < model_num_insns)
+ fprintf (sched_dump, "%d:%d]", group->limits[pci].point,
+ INSN_UID (MODEL_INSN (group->limits[pci].point)));
+ else
+ fprintf (sched_dump, "end]");
}
+ fprintf (sched_dump, "\n");
+}
- ready->n_ready++;
- if (DEBUG_INSN_P (insn))
- ready->n_debug++;
+/* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1]. */
- gcc_assert (QUEUE_INDEX (insn) != QUEUE_READY);
- QUEUE_INDEX (insn) = QUEUE_READY;
+static void
+model_set_excess_costs (rtx *insns, int count)
+{
+ int i, cost, priority_base, priority;
+ bool print_p;
+
+ /* Record the baseECC value for each instruction in the model schedule,
+ except that negative costs are converted to zero ones now rather thatn
+ later. Do not assign a cost to debug instructions, since they must
+ not change code-generation decisions. Experiments suggest we also
+ get better results by not assigning a cost to instructions from
+ a different block.
+
+ Set PRIORITY_BASE to baseP in the block comment above. This is the
+ maximum priority of the "cheap" instructions, which should always
+ include the next model instruction. */
+ priority_base = 0;
+ print_p = false;
+ for (i = 0; i < count; i++)
+ if (INSN_MODEL_INDEX (insns[i]))
+ {
+ if (sched_verbose >= 6 && !print_p)
+ {
+ fprintf (sched_dump, MODEL_BAR);
+ fprintf (sched_dump, ";;\t\t| Pressure costs for ready queue\n");
+ model_dump_pressure_points (&model_before_pressure);
+ fprintf (sched_dump, MODEL_BAR);
+ print_p = true;
+ }
+ cost = model_excess_cost (insns[i], print_p);
+ if (cost <= 0)
+ {
+ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]) - cost;
+ priority_base = MAX (priority_base, priority);
+ cost = 0;
+ }
+ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost;
+ }
+ if (print_p)
+ fprintf (sched_dump, MODEL_BAR);
- if (INSN_EXACT_TICK (insn) != INVALID_TICK
- && INSN_EXACT_TICK (insn) < clock_var)
+ /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each
+ instruction. */
+ for (i = 0; i < count; i++)
{
- must_backtrack = true;
+ cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]);
+ priority = INSN_PRIORITY (insns[i]) - insn_delay (insns[i]);
+ if (cost > 0 && priority > priority_base)
+ {
+ cost += priority_base - priority;
+ INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0);
+ }
}
}
+\f
+/* Returns a positive value if x is preferred; returns a negative value if
+ y is preferred. Should never return 0, since that will make the sort
+ unstable. */
-/* Remove the element with the highest priority from the ready list and
- return it. */
-
-HAIFA_INLINE static rtx
-ready_remove_first (struct ready_list *ready)
+static int
+rank_for_schedule (const void *x, const void *y)
{
- rtx t;
+ rtx tmp = *(const rtx *) y;
+ rtx tmp2 = *(const rtx *) x;
+ int tmp_class, tmp2_class;
+ int val, priority_val, info_val;
- gcc_assert (ready->n_ready);
- t = ready->vec[ready->first--];
- ready->n_ready--;
- if (DEBUG_INSN_P (t))
- ready->n_debug--;
- /* If the queue becomes empty, reset it. */
- if (ready->n_ready == 0)
- ready->first = ready->veclen - 1;
+ if (MAY_HAVE_DEBUG_INSNS)
+ {
+ /* Schedule debug insns as early as possible. */
+ if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2))
+ return -1;
+ else if (!DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
+ return 1;
+ else if (DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2))
+ return INSN_LUID (tmp) - INSN_LUID (tmp2);
+ }
- gcc_assert (QUEUE_INDEX (t) == QUEUE_READY);
- QUEUE_INDEX (t) = QUEUE_NOWHERE;
+ /* The insn in a schedule group should be issued the first. */
+ if (flag_sched_group_heuristic &&
+ SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
+ return SCHED_GROUP_P (tmp2) ? 1 : -1;
+
+ /* Make sure that priority of TMP and TMP2 are initialized. */
+ gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
+
+ if (sched_pressure != SCHED_PRESSURE_NONE)
+ {
+ int diff;
+
+ /* Prefer insn whose scheduling results in the smallest register
+ pressure excess. */
+ if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp)
+ + insn_delay (tmp)
+ - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2)
+ - insn_delay (tmp2))))
+ return diff;
+ }
+
+ if (sched_pressure != SCHED_PRESSURE_NONE
+ && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var))
+ {
+ if (INSN_TICK (tmp) <= clock_var)
+ return -1;
+ else if (INSN_TICK (tmp2) <= clock_var)
+ return 1;
+ else
+ return INSN_TICK (tmp) - INSN_TICK (tmp2);
+ }
+
+ /* If we are doing backtracking in this schedule, prefer insns that
+ have forward dependencies with negative cost against an insn that
+ was already scheduled. */
+ if (current_sched_info->flags & DO_BACKTRACKING)
+ {
+ priority_val = FEEDS_BACKTRACK_INSN (tmp2) - FEEDS_BACKTRACK_INSN (tmp);
+ if (priority_val)
+ return priority_val;
+ }
+
+ /* Prefer insn with higher priority. */
+ priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
+
+ if (flag_sched_critical_path_heuristic && priority_val)
+ return priority_val;
+
+ /* Prefer speculative insn with greater dependencies weakness. */
+ if (flag_sched_spec_insn_heuristic && spec_info)
+ {
+ ds_t ds1, ds2;
+ dw_t dw1, dw2;
+ int dw;
+
+ ds1 = TODO_SPEC (tmp) & SPECULATIVE;
+ if (ds1)
+ dw1 = ds_weak (ds1);
+ else
+ dw1 = NO_DEP_WEAK;
+
+ ds2 = TODO_SPEC (tmp2) & SPECULATIVE;
+ if (ds2)
+ dw2 = ds_weak (ds2);
+ else
+ dw2 = NO_DEP_WEAK;
+
+ dw = dw2 - dw1;
+ if (dw > (NO_DEP_WEAK / 8) || dw < -(NO_DEP_WEAK / 8))
+ return dw;
+ }
+
+ info_val = (*current_sched_info->rank) (tmp, tmp2);
+ if(flag_sched_rank_heuristic && info_val)
+ return info_val;
+
+ /* Compare insns based on their relation to the last scheduled
+ non-debug insn. */
+ if (flag_sched_last_insn_heuristic && last_nondebug_scheduled_insn)
+ {
+ dep_t dep1;
+ dep_t dep2;
+ rtx last = last_nondebug_scheduled_insn;
+
+ /* Classify the instructions into three classes:
+ 1) Data dependent on last schedule insn.
+ 2) Anti/Output dependent on last scheduled insn.
+ 3) Independent of last scheduled insn, or has latency of one.
+ Choose the insn from the highest numbered class if different. */
+ dep1 = sd_find_dep_between (last, tmp, true);
+
+ if (dep1 == NULL || dep_cost (dep1) == 1)
+ tmp_class = 3;
+ else if (/* Data dependence. */
+ DEP_TYPE (dep1) == REG_DEP_TRUE)
+ tmp_class = 1;
+ else
+ tmp_class = 2;
+
+ dep2 = sd_find_dep_between (last, tmp2, true);
+
+ if (dep2 == NULL || dep_cost (dep2) == 1)
+ tmp2_class = 3;
+ else if (/* Data dependence. */
+ DEP_TYPE (dep2) == REG_DEP_TRUE)
+ tmp2_class = 1;
+ else
+ tmp2_class = 2;
+
+ if ((val = tmp2_class - tmp_class))
+ return val;
+ }
+
+ /* Prefer instructions that occur earlier in the model schedule. */
+ if (sched_pressure == SCHED_PRESSURE_MODEL)
+ {
+ int diff;
+
+ diff = model_index (tmp) - model_index (tmp2);
+ if (diff != 0)
+ return diff;
+ }
+
+ /* Prefer the insn which has more later insns that depend on it.
+ This gives the scheduler more freedom when scheduling later
+ instructions at the expense of added register pressure. */
+
+ val = (dep_list_size (tmp2, SD_LIST_FORW)
+ - dep_list_size (tmp, SD_LIST_FORW));
+
+ if (flag_sched_dep_count_heuristic && val != 0)
+ return val;
+
+ /* If insns are equally good, sort by INSN_LUID (original insn order),
+ so that we make the sort stable. This minimizes instruction movement,
+ thus minimizing sched's effect on debugging and cross-jumping. */
+ return INSN_LUID (tmp) - INSN_LUID (tmp2);
+}
+
+/* Resort the array A in which only element at index N may be out of order. */
+
+HAIFA_INLINE static void
+swap_sort (rtx *a, int n)
+{
+ rtx insn = a[n - 1];
+ int i = n - 2;
+
+ while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
+ {
+ a[i + 1] = a[i];
+ i -= 1;
+ }
+ a[i + 1] = insn;
+}
+
+/* Add INSN to the insn queue so that it can be executed at least
+ N_CYCLES after the currently executing insn. Preserve insns
+ chain for debugging purposes. REASON will be printed in debugging
+ output. */
+
+HAIFA_INLINE static void
+queue_insn (rtx insn, int n_cycles, const char *reason)
+{
+ int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
+ rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
+ int new_tick;
+
+ gcc_assert (n_cycles <= max_insn_queue_index);
+ gcc_assert (!DEBUG_INSN_P (insn));
+
+ insn_queue[next_q] = link;
+ q_size += 1;
+
+ if (sched_verbose >= 2)
+ {
+ fprintf (sched_dump, ";;\t\tReady-->Q: insn %s: ",
+ (*current_sched_info->print_insn) (insn, 0));
+
+ fprintf (sched_dump, "queued for %d cycles (%s).\n", n_cycles, reason);
+ }
+
+ QUEUE_INDEX (insn) = next_q;
+
+ if (current_sched_info->flags & DO_BACKTRACKING)
+ {
+ new_tick = clock_var + n_cycles;
+ if (INSN_TICK (insn) == INVALID_TICK || INSN_TICK (insn) < new_tick)
+ INSN_TICK (insn) = new_tick;
+
+ if (INSN_EXACT_TICK (insn) != INVALID_TICK
+ && INSN_EXACT_TICK (insn) < clock_var + n_cycles)
+ {
+ must_backtrack = true;
+ if (sched_verbose >= 2)
+ fprintf (sched_dump, ";;\t\tcausing a backtrack.\n");
+ }
+ }
+}
+
+/* Remove INSN from queue. */
+static void
+queue_remove (rtx insn)
+{
+ gcc_assert (QUEUE_INDEX (insn) >= 0);
+ remove_free_INSN_LIST_elem (insn, &insn_queue[QUEUE_INDEX (insn)]);
+ q_size--;
+ QUEUE_INDEX (insn) = QUEUE_NOWHERE;
+}
+
+/* Return a pointer to the bottom of the ready list, i.e. the insn
+ with the lowest priority. */
+
+rtx *
+ready_lastpos (struct ready_list *ready)
+{
+ gcc_assert (ready->n_ready >= 1);
+ return ready->vec + ready->first - ready->n_ready + 1;
+}
+
+/* Add an element INSN to the ready list so that it ends up with the
+ lowest/highest priority depending on FIRST_P. */
+
+HAIFA_INLINE static void
+ready_add (struct ready_list *ready, rtx insn, bool first_p)
+{
+ if (!first_p)
+ {
+ if (ready->first == ready->n_ready)
+ {
+ memmove (ready->vec + ready->veclen - ready->n_ready,
+ ready_lastpos (ready),
+ ready->n_ready * sizeof (rtx));
+ ready->first = ready->veclen - 1;
+ }
+ ready->vec[ready->first - ready->n_ready] = insn;
+ }
+ else
+ {
+ if (ready->first == ready->veclen - 1)
+ {
+ if (ready->n_ready)
+ /* ready_lastpos() fails when called with (ready->n_ready == 0). */
+ memmove (ready->vec + ready->veclen - ready->n_ready - 1,
+ ready_lastpos (ready),
+ ready->n_ready * sizeof (rtx));
+ ready->first = ready->veclen - 2;
+ }
+ ready->vec[++(ready->first)] = insn;
+ }
+
+ ready->n_ready++;
+ if (DEBUG_INSN_P (insn))
+ ready->n_debug++;
+
+ gcc_assert (QUEUE_INDEX (insn) != QUEUE_READY);
+ QUEUE_INDEX (insn) = QUEUE_READY;
+
+ if (INSN_EXACT_TICK (insn) != INVALID_TICK
+ && INSN_EXACT_TICK (insn) < clock_var)
+ {
+ must_backtrack = true;
+ }
+}
+
+/* Remove the element with the highest priority from the ready list and
+ return it. */
+
+HAIFA_INLINE static rtx
+ready_remove_first (struct ready_list *ready)
+{
+ rtx t;
+
+ gcc_assert (ready->n_ready);
+ t = ready->vec[ready->first--];
+ ready->n_ready--;
+ if (DEBUG_INSN_P (t))
+ ready->n_debug--;
+ /* If the queue becomes empty, reset it. */
+ if (ready->n_ready == 0)
+ ready->first = ready->veclen - 1;
+
+ gcc_assert (QUEUE_INDEX (t) == QUEUE_READY);
+ QUEUE_INDEX (t) = QUEUE_NOWHERE;
+
+ return t;
+}
+
+/* The following code implements multi-pass scheduling for the first
+ cycle. In other words, we will try to choose ready insn which
+ permits to start maximum number of insns on the same cycle. */
+
+/* Return a pointer to the element INDEX from the ready. INDEX for
+ insn with the highest priority is 0, and the lowest priority has
+ N_READY - 1. */
+
+rtx
+ready_element (struct ready_list *ready, int index)
+{
+ gcc_assert (ready->n_ready && index < ready->n_ready);
+
+ return ready->vec[ready->first - index];
+}
+
+/* Remove the element INDEX from the ready list and return it. INDEX
+ for insn with the highest priority is 0, and the lowest priority
+ has N_READY - 1. */
+
+HAIFA_INLINE static rtx
+ready_remove (struct ready_list *ready, int index)
+{
+ rtx t;
+ int i;
+
+ if (index == 0)
+ return ready_remove_first (ready);
+ gcc_assert (ready->n_ready && index < ready->n_ready);
+ t = ready->vec[ready->first - index];
+ ready->n_ready--;
+ if (DEBUG_INSN_P (t))
+ ready->n_debug--;
+ for (i = index; i < ready->n_ready; i++)
+ ready->vec[ready->first - i] = ready->vec[ready->first - i - 1];
+ QUEUE_INDEX (t) = QUEUE_NOWHERE;
+ return t;
+}
+
+/* Remove INSN from the ready list. */
+static void
+ready_remove_insn (rtx insn)
+{
+ int i;
+
+ for (i = 0; i < readyp->n_ready; i++)
+ if (ready_element (readyp, i) == insn)
+ {
+ ready_remove (readyp, i);
+ return;
+ }
+ gcc_unreachable ();
+}
+
+/* Sort the ready list READY by ascending priority, using the SCHED_SORT
+ macro. */
+
+void
+ready_sort (struct ready_list *ready)
+{
+ int i;
+ rtx *first = ready_lastpos (ready);
+
+ if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+ {
+ for (i = 0; i < ready->n_ready; i++)
+ if (!DEBUG_INSN_P (first[i]))
+ setup_insn_reg_pressure_info (first[i]);
+ }
+ if (sched_pressure == SCHED_PRESSURE_MODEL
+ && model_curr_point < model_num_insns)
+ model_set_excess_costs (first, ready->n_ready);
+ SCHED_SORT (first, ready->n_ready);
+}
+
+/* PREV is an insn that is ready to execute. Adjust its priority if that
+ will help shorten or lengthen register lifetimes as appropriate. Also
+ provide a hook for the target to tweak itself. */
+
+HAIFA_INLINE static void
+adjust_priority (rtx prev)
+{
+ /* ??? There used to be code here to try and estimate how an insn
+ affected register lifetimes, but it did it by looking at REG_DEAD
+ notes, which we removed in schedule_region. Nor did it try to
+ take into account register pressure or anything useful like that.
+
+ Revisit when we have a machine model to work with and not before. */
+
+ if (targetm.sched.adjust_priority)
+ INSN_PRIORITY (prev) =
+ targetm.sched.adjust_priority (prev, INSN_PRIORITY (prev));
+}
+
+/* Advance DFA state STATE on one cycle. */
+void
+advance_state (state_t state)
+{
+ if (targetm.sched.dfa_pre_advance_cycle)
+ targetm.sched.dfa_pre_advance_cycle ();
+
+ if (targetm.sched.dfa_pre_cycle_insn)
+ state_transition (state,
+ targetm.sched.dfa_pre_cycle_insn ());
+
+ state_transition (state, NULL);
+
+ if (targetm.sched.dfa_post_cycle_insn)
+ state_transition (state,
+ targetm.sched.dfa_post_cycle_insn ());
+
+ if (targetm.sched.dfa_post_advance_cycle)
+ targetm.sched.dfa_post_advance_cycle ();
+}
+
+/* Advance time on one cycle. */
+HAIFA_INLINE static void
+advance_one_cycle (void)
+{
+ advance_state (curr_state);
+ if (sched_verbose >= 6)
+ fprintf (sched_dump, ";;\tAdvanced a state.\n");
+}
+
+/* Update register pressure after scheduling INSN. */
+static void
+update_register_pressure (rtx insn)
+{
+ struct reg_use_data *use;
+ struct reg_set_data *set;
+
+ gcc_checking_assert (!DEBUG_INSN_P (insn));
+
+ for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
+ if (dying_use_p (use))
+ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+ use->regno, false);
+ for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set)
+ mark_regno_birth_or_death (curr_reg_live, curr_reg_pressure,
+ set->regno, true);
+}
+
+/* Set up or update (if UPDATE_P) max register pressure (see its
+ meaning in sched-int.h::_haifa_insn_data) for all current BB insns
+ after insn AFTER. */
+static void
+setup_insn_max_reg_pressure (rtx after, bool update_p)
+{
+ int i, p;
+ bool eq_p;
+ rtx insn;
+ static int max_reg_pressure[N_REG_CLASSES];
+
+ save_reg_pressure ();
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ max_reg_pressure[ira_pressure_classes[i]]
+ = curr_reg_pressure[ira_pressure_classes[i]];
+ for (insn = NEXT_INSN (after);
+ insn != NULL_RTX && ! BARRIER_P (insn)
+ && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (after);
+ insn = NEXT_INSN (insn))
+ if (NONDEBUG_INSN_P (insn))
+ {
+ eq_p = true;
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ {
+ p = max_reg_pressure[ira_pressure_classes[i]];
+ if (INSN_MAX_REG_PRESSURE (insn)[i] != p)
+ {
+ eq_p = false;
+ INSN_MAX_REG_PRESSURE (insn)[i]
+ = max_reg_pressure[ira_pressure_classes[i]];
+ }
+ }
+ if (update_p && eq_p)
+ break;
+ update_register_pressure (insn);
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ if (max_reg_pressure[ira_pressure_classes[i]]
+ < curr_reg_pressure[ira_pressure_classes[i]])
+ max_reg_pressure[ira_pressure_classes[i]]
+ = curr_reg_pressure[ira_pressure_classes[i]];
+ }
+ restore_reg_pressure ();
+}
+
+/* Update the current register pressure after scheduling INSN. Update
+ also max register pressure for unscheduled insns of the current
+ BB. */
+static void
+update_reg_and_insn_max_reg_pressure (rtx insn)
+{
+ int i;
+ int before[N_REG_CLASSES];
+
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ before[i] = curr_reg_pressure[ira_pressure_classes[i]];
+ update_register_pressure (insn);
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ if (curr_reg_pressure[ira_pressure_classes[i]] != before[i])
+ break;
+ if (i < ira_pressure_classes_num)
+ setup_insn_max_reg_pressure (insn, true);
+}
+
+/* Set up register pressure at the beginning of basic block BB whose
+ insns starting after insn AFTER. Set up also max register pressure
+ for all insns of the basic block. */
+void
+sched_setup_bb_reg_pressure_info (basic_block bb, rtx after)
+{
+ gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED);
+ initiate_bb_reg_pressure_info (bb);
+ setup_insn_max_reg_pressure (after, false);
+}
+\f
+/* If doing predication while scheduling, verify whether INSN, which
+ has just been scheduled, clobbers the conditions of any
+ instructions that must be predicated in order to break their
+ dependencies. If so, remove them from the queues so that they will
+ only be scheduled once their control dependency is resolved. */
+
+static void
+check_clobbered_conditions (rtx insn)
+{
+ HARD_REG_SET t;
+ int i;
+
+ if ((current_sched_info->flags & DO_PREDICATION) == 0)
+ return;
+
+ find_all_hard_reg_sets (insn, &t);
+
+ restart:
+ for (i = 0; i < ready.n_ready; i++)
+ {
+ rtx x = ready_element (&ready, i);
+ if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (x, t))
+ {
+ ready_remove_insn (x);
+ goto restart;
+ }
+ }
+ for (i = 0; i <= max_insn_queue_index; i++)
+ {
+ rtx link;
+ int q = NEXT_Q_AFTER (q_ptr, i);
+
+ restart_queue:
+ for (link = insn_queue[q]; link; link = XEXP (link, 1))
+ {
+ rtx x = XEXP (link, 0);
+ if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (x, t))
+ {
+ queue_remove (x);
+ goto restart_queue;
+ }
+ }
+ }
+}
+\f
+/* Return (in order):
+
+ - positive if INSN adversely affects the pressure on one
+ register class
+
+ - negative if INSN reduces the pressure on one register class
+
+ - 0 if INSN doesn't affect the pressure on any register class. */
+
+static int
+model_classify_pressure (struct model_insn_info *insn)
+{
+ struct reg_pressure_data *reg_pressure;
+ int death[N_REG_CLASSES];
+ int pci, cl, sum;
+
+ calculate_reg_deaths (insn->insn, death);
+ reg_pressure = INSN_REG_PRESSURE (insn->insn);
+ sum = 0;
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ if (death[cl] < reg_pressure[pci].set_increase)
+ return 1;
+ sum += reg_pressure[pci].set_increase - death[cl];
+ }
+ return sum;
+}
+
+/* Return true if INSN1 should come before INSN2 in the model schedule. */
+
+static int
+model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2)
+{
+ unsigned int height1, height2;
+ unsigned int priority1, priority2;
+
+ /* Prefer instructions with a higher model priority. */
+ if (insn1->model_priority != insn2->model_priority)
+ return insn1->model_priority > insn2->model_priority;
+
+ /* Combine the length of the longest path of satisfied true dependencies
+ that leads to each instruction (depth) with the length of the longest
+ path of any dependencies that leads from the instruction (alap).
+ Prefer instructions with the greatest combined length. If the combined
+ lengths are equal, prefer instructions with the greatest depth.
+
+ The idea is that, if we have a set S of "equal" instructions that each
+ have ALAP value X, and we pick one such instruction I, any true-dependent
+ successors of I that have ALAP value X - 1 should be preferred over S.
+ This encourages the schedule to be "narrow" rather than "wide".
+ However, if I is a low-priority instruction that we decided to
+ schedule because of its model_classify_pressure, and if there
+ is a set of higher-priority instructions T, the aforementioned
+ successors of I should not have the edge over T. */
+ height1 = insn1->depth + insn1->alap;
+ height2 = insn2->depth + insn2->alap;
+ if (height1 != height2)
+ return height1 > height2;
+ if (insn1->depth != insn2->depth)
+ return insn1->depth > insn2->depth;
+
+ /* We have no real preference between INSN1 an INSN2 as far as attempts
+ to reduce pressure go. Prefer instructions with higher priorities. */
+ priority1 = INSN_PRIORITY (insn1->insn);
+ priority2 = INSN_PRIORITY (insn2->insn);
+ if (priority1 != priority2)
+ return priority1 > priority2;
+
+ /* Use the original rtl sequence as a tie-breaker. */
+ return insn1 < insn2;
+}
+
+/* Add INSN to the model worklist immediately after PREV. Add it to the
+ beginning of the list if PREV is null. */
+
+static void
+model_add_to_worklist_at (struct model_insn_info *insn,
+ struct model_insn_info *prev)
+{
+ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE);
+ QUEUE_INDEX (insn->insn) = QUEUE_READY;
+
+ insn->prev = prev;
+ if (prev)
+ {
+ insn->next = prev->next;
+ prev->next = insn;
+ }
+ else
+ {
+ insn->next = model_worklist;
+ model_worklist = insn;
+ }
+ if (insn->next)
+ insn->next->prev = insn;
+}
+
+/* Remove INSN from the model worklist. */
+
+static void
+model_remove_from_worklist (struct model_insn_info *insn)
+{
+ gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY);
+ QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE;
+
+ if (insn->prev)
+ insn->prev->next = insn->next;
+ else
+ model_worklist = insn->next;
+ if (insn->next)
+ insn->next->prev = insn->prev;
+}
+
+/* Add INSN to the model worklist. Start looking for a suitable position
+ between neighbors PREV and NEXT, testing at most MAX_SCHED_READY_INSNS
+ insns either side. A null PREV indicates the beginning of the list and
+ a null NEXT indicates the end. */
+
+static void
+model_add_to_worklist (struct model_insn_info *insn,
+ struct model_insn_info *prev,
+ struct model_insn_info *next)
+{
+ int count;
+
+ count = MAX_SCHED_READY_INSNS;
+ if (count > 0 && prev && model_order_p (insn, prev))
+ do
+ {
+ count--;
+ prev = prev->prev;
+ }
+ while (count > 0 && prev && model_order_p (insn, prev));
+ else
+ while (count > 0 && next && model_order_p (next, insn))
+ {
+ count--;
+ prev = next;
+ next = next->next;
+ }
+ model_add_to_worklist_at (insn, prev);
+}
+
+/* INSN may now have a higher priority (in the model_order_p sense)
+ than before. Move it up the worklist if necessary. */
+
+static void
+model_promote_insn (struct model_insn_info *insn)
+{
+ struct model_insn_info *prev;
+ int count;
+
+ prev = insn->prev;
+ count = MAX_SCHED_READY_INSNS;
+ while (count > 0 && prev && model_order_p (insn, prev))
+ {
+ count--;
+ prev = prev->prev;
+ }
+ if (prev != insn->prev)
+ {
+ model_remove_from_worklist (insn);
+ model_add_to_worklist_at (insn, prev);
+ }
+}
+
+/* Add INSN to the end of the model schedule. */
+
+static void
+model_add_to_schedule (rtx insn)
+{
+ unsigned int point;
+
+ gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
+ QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
+
+ point = VEC_length (rtx, model_schedule);
+ VEC_quick_push (rtx, model_schedule, insn);
+ INSN_MODEL_INDEX (insn) = point + 1;
+}
+
+/* Analyze the instructions that are to be scheduled, setting up
+ MODEL_INSN_INFO (...) and model_num_insns accordingly. Add ready
+ instructions to model_worklist. */
+
+static void
+model_analyze_insns (void)
+{
+ rtx start, end, iter;
+ sd_iterator_def sd_it;
+ dep_t dep;
+ struct model_insn_info *insn, *con;
+
+ model_num_insns = 0;
+ start = PREV_INSN (current_sched_info->next_tail);
+ end = current_sched_info->prev_head;
+ for (iter = start; iter != end; iter = PREV_INSN (iter))
+ if (NONDEBUG_INSN_P (iter))
+ {
+ insn = MODEL_INSN_INFO (iter);
+ insn->insn = iter;
+ FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep)
+ {
+ con = MODEL_INSN_INFO (DEP_CON (dep));
+ if (con->insn && insn->alap < con->alap + 1)
+ insn->alap = con->alap + 1;
+ }
+
+ insn->old_queue = QUEUE_INDEX (iter);
+ QUEUE_INDEX (iter) = QUEUE_NOWHERE;
+
+ insn->unscheduled_preds = dep_list_size (iter, SD_LIST_HARD_BACK);
+ if (insn->unscheduled_preds == 0)
+ model_add_to_worklist (insn, NULL, model_worklist);
+
+ model_num_insns++;
+ }
+}
+
+/* The global state describes the register pressure at the start of the
+ model schedule. Initialize GROUP accordingly. */
+
+static void
+model_init_pressure_group (struct model_pressure_group *group)
+{
+ int pci, cl;
- return t;
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ group->limits[pci].pressure = curr_reg_pressure[cl];
+ group->limits[pci].point = 0;
+ }
+ /* Use index model_num_insns to record the state after the last
+ instruction in the model schedule. */
+ group->model = XNEWVEC (struct model_pressure_data,
+ (model_num_insns + 1) * ira_pressure_classes_num);
}
-/* The following code implements multi-pass scheduling for the first
- cycle. In other words, we will try to choose ready insn which
- permits to start maximum number of insns on the same cycle. */
-
-/* Return a pointer to the element INDEX from the ready. INDEX for
- insn with the highest priority is 0, and the lowest priority has
- N_READY - 1. */
+/* Record that MODEL_REF_PRESSURE (GROUP, POINT, PCI) is PRESSURE.
+ Update the maximum pressure for the whole schedule. */
-rtx
-ready_element (struct ready_list *ready, int index)
+static void
+model_record_pressure (struct model_pressure_group *group,
+ int point, int pci, int pressure)
{
- gcc_assert (ready->n_ready && index < ready->n_ready);
-
- return ready->vec[ready->first - index];
+ MODEL_REF_PRESSURE (group, point, pci) = pressure;
+ if (group->limits[pci].pressure < pressure)
+ {
+ group->limits[pci].pressure = pressure;
+ group->limits[pci].point = point;
+ }
}
-/* Remove the element INDEX from the ready list and return it. INDEX
- for insn with the highest priority is 0, and the lowest priority
- has N_READY - 1. */
+/* INSN has just been added to the end of the model schedule. Record its
+ register-pressure information. */
-HAIFA_INLINE static rtx
-ready_remove (struct ready_list *ready, int index)
+static void
+model_record_pressures (struct model_insn_info *insn)
{
- rtx t;
- int i;
+ struct reg_pressure_data *reg_pressure;
+ int point, pci, cl, delta;
+ int death[N_REG_CLASSES];
- if (index == 0)
- return ready_remove_first (ready);
- gcc_assert (ready->n_ready && index < ready->n_ready);
- t = ready->vec[ready->first - index];
- ready->n_ready--;
- if (DEBUG_INSN_P (t))
- ready->n_debug--;
- for (i = index; i < ready->n_ready; i++)
- ready->vec[ready->first - i] = ready->vec[ready->first - i - 1];
- QUEUE_INDEX (t) = QUEUE_NOWHERE;
- return t;
+ point = model_index (insn->insn);
+ if (sched_verbose >= 2)
+ {
+ char buf[2048];
+
+ if (point == 0)
+ {
+ fprintf (sched_dump, "\n;;\tModel schedule:\n;;\n");
+ fprintf (sched_dump, ";;\t| idx insn | mpri hght dpth prio |\n");
+ }
+ print_pattern (buf, PATTERN (insn->insn), 0);
+ fprintf (sched_dump, ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s ",
+ point, INSN_UID (insn->insn), insn->model_priority,
+ insn->depth + insn->alap, insn->depth,
+ INSN_PRIORITY (insn->insn), buf);
+ }
+ calculate_reg_deaths (insn->insn, death);
+ reg_pressure = INSN_REG_PRESSURE (insn->insn);
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ delta = reg_pressure[pci].set_increase - death[cl];
+ if (sched_verbose >= 2)
+ fprintf (sched_dump, " %s:[%d,%+d]", reg_class_names[cl],
+ curr_reg_pressure[cl], delta);
+ model_record_pressure (&model_before_pressure, point, pci,
+ curr_reg_pressure[cl]);
+ }
+ if (sched_verbose >= 2)
+ fprintf (sched_dump, "\n");
}
-/* Remove INSN from the ready list. */
+/* All instructions have been added to the model schedule. Record the
+ final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs. */
+
static void
-ready_remove_insn (rtx insn)
+model_record_final_pressures (struct model_pressure_group *group)
{
- int i;
+ int point, pci, max_pressure, ref_pressure, cl;
- for (i = 0; i < readyp->n_ready; i++)
- if (ready_element (readyp, i) == insn)
- {
- ready_remove (readyp, i);
- return;
- }
- gcc_unreachable ();
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ /* Record the final pressure for this class. */
+ cl = ira_pressure_classes[pci];
+ point = model_num_insns;
+ ref_pressure = curr_reg_pressure[cl];
+ model_record_pressure (group, point, pci, ref_pressure);
+
+ /* Record the original maximum pressure. */
+ group->limits[pci].orig_pressure = group->limits[pci].pressure;
+
+ /* Update the MODEL_MAX_PRESSURE for every point of the schedule. */
+ max_pressure = ref_pressure;
+ MODEL_MAX_PRESSURE (group, point, pci) = max_pressure;
+ while (point > 0)
+ {
+ point--;
+ ref_pressure = MODEL_REF_PRESSURE (group, point, pci);
+ max_pressure = MAX (max_pressure, ref_pressure);
+ MODEL_MAX_PRESSURE (group, point, pci) = max_pressure;
+ }
+ }
}
-/* Sort the ready list READY by ascending priority, using the SCHED_SORT
- macro. */
+/* Update all successors of INSN, given that INSN has just been scheduled. */
-void
-ready_sort (struct ready_list *ready)
+static void
+model_add_successors_to_worklist (struct model_insn_info *insn)
{
- int i;
- rtx *first = ready_lastpos (ready);
+ sd_iterator_def sd_it;
+ struct model_insn_info *con;
+ dep_t dep;
- if (sched_pressure_p)
+ FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep)
{
- for (i = 0; i < ready->n_ready; i++)
- if (!DEBUG_INSN_P (first[i]))
- setup_insn_reg_pressure_info (first[i]);
+ con = MODEL_INSN_INFO (DEP_CON (dep));
+ /* Ignore debug instructions, and instructions from other blocks. */
+ if (con->insn)
+ {
+ con->unscheduled_preds--;
+
+ /* Update the depth field of each true-dependent successor.
+ Increasing the depth gives them a higher priority than
+ before. */
+ if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1)
+ {
+ con->depth = insn->depth + 1;
+ if (QUEUE_INDEX (con->insn) == QUEUE_READY)
+ model_promote_insn (con);
+ }
+
+ /* If this is a true dependency, or if there are no remaining
+ dependencies for CON (meaning that CON only had non-true
+ dependencies), make sure that CON is on the worklist.
+ We don't bother otherwise because it would tend to fill the
+ worklist with a lot of low-priority instructions that are not
+ yet ready to issue. */
+ if ((con->depth > 0 || con->unscheduled_preds == 0)
+ && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE)
+ model_add_to_worklist (con, insn, insn->next);
+ }
}
- SCHED_SORT (first, ready->n_ready);
}
-/* PREV is an insn that is ready to execute. Adjust its priority if that
- will help shorten or lengthen register lifetimes as appropriate. Also
- provide a hook for the target to tweak itself. */
+/* Give INSN a higher priority than any current instruction, then give
+ unscheduled predecessors of INSN a higher priority still. If any of
+ those predecessors are not on the model worklist, do the same for its
+ predecessors, and so on. */
-HAIFA_INLINE static void
-adjust_priority (rtx prev)
+static void
+model_promote_predecessors (struct model_insn_info *insn)
{
- /* ??? There used to be code here to try and estimate how an insn
- affected register lifetimes, but it did it by looking at REG_DEAD
- notes, which we removed in schedule_region. Nor did it try to
- take into account register pressure or anything useful like that.
+ struct model_insn_info *pro, *first;
+ sd_iterator_def sd_it;
+ dep_t dep;
- Revisit when we have a machine model to work with and not before. */
+ if (sched_verbose >= 7)
+ fprintf (sched_dump, ";;\t+--- priority of %d = %d, priority of",
+ INSN_UID (insn->insn), model_next_priority);
+ insn->model_priority = model_next_priority++;
+ model_remove_from_worklist (insn);
+ model_add_to_worklist_at (insn, NULL);
- if (targetm.sched.adjust_priority)
- INSN_PRIORITY (prev) =
- targetm.sched.adjust_priority (prev, INSN_PRIORITY (prev));
+ first = NULL;
+ for (;;)
+ {
+ FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep)
+ {
+ pro = MODEL_INSN_INFO (DEP_PRO (dep));
+ /* The first test is to ignore debug instructions, and instructions
+ from other blocks. */
+ if (pro->insn
+ && pro->model_priority != model_next_priority
+ && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED)
+ {
+ pro->model_priority = model_next_priority;
+ if (sched_verbose >= 7)
+ fprintf (sched_dump, " %d", INSN_UID (pro->insn));
+ if (QUEUE_INDEX (pro->insn) == QUEUE_READY)
+ {
+ /* PRO is already in the worklist, but it now has
+ a higher priority than before. Move it at the
+ appropriate place. */
+ model_remove_from_worklist (pro);
+ model_add_to_worklist (pro, NULL, model_worklist);
+ }
+ else
+ {
+ /* PRO isn't in the worklist. Recursively process
+ its predecessors until we find one that is. */
+ pro->next = first;
+ first = pro;
+ }
+ }
+ }
+ if (!first)
+ break;
+ insn = first;
+ first = insn->next;
+ }
+ if (sched_verbose >= 7)
+ fprintf (sched_dump, " = %d\n", model_next_priority);
+ model_next_priority++;
}
-/* Advance DFA state STATE on one cycle. */
-void
-advance_state (state_t state)
+/* Pick one instruction from model_worklist and process it. */
+
+static void
+model_choose_insn (void)
{
- if (targetm.sched.dfa_pre_advance_cycle)
- targetm.sched.dfa_pre_advance_cycle ();
+ struct model_insn_info *insn, *fallback;
+ int count;
- if (targetm.sched.dfa_pre_cycle_insn)
- state_transition (state,
- targetm.sched.dfa_pre_cycle_insn ());
+ if (sched_verbose >= 7)
+ {
+ fprintf (sched_dump, ";;\t+--- worklist:\n");
+ insn = model_worklist;
+ count = MAX_SCHED_READY_INSNS;
+ while (count > 0 && insn)
+ {
+ fprintf (sched_dump, ";;\t+--- %d [%d, %d, %d, %d]\n",
+ INSN_UID (insn->insn), insn->model_priority,
+ insn->depth + insn->alap, insn->depth,
+ INSN_PRIORITY (insn->insn));
+ count--;
+ insn = insn->next;
+ }
+ }
- state_transition (state, NULL);
+ /* Look for a ready instruction whose model_classify_priority is zero
+ or negative, picking the highest-priority one. Adding such an
+ instruction to the schedule now should do no harm, and may actually
+ do some good.
- if (targetm.sched.dfa_post_cycle_insn)
- state_transition (state,
- targetm.sched.dfa_post_cycle_insn ());
+ Failing that, see whether there is an instruction with the highest
+ extant model_priority that is not yet ready, but which would reduce
+ pressure if it became ready. This is designed to catch cases like:
- if (targetm.sched.dfa_post_advance_cycle)
- targetm.sched.dfa_post_advance_cycle ();
+ (set (mem (reg R1)) (reg R2))
+
+ where the instruction is the last remaining use of R1 and where the
+ value of R2 is not yet available (or vice versa). The death of R1
+ means that this instruction already reduces pressure. It is of
+ course possible that the computation of R2 involves other registers
+ that are hard to kill, but such cases are rare enough for this
+ heuristic to be a win in general.
+
+ Failing that, just pick the highest-priority instruction in the
+ worklist. */
+ count = MAX_SCHED_READY_INSNS;
+ insn = model_worklist;
+ fallback = 0;
+ for (;;)
+ {
+ if (count == 0 || !insn)
+ {
+ insn = fallback ? fallback : model_worklist;
+ break;
+ }
+ if (insn->unscheduled_preds)
+ {
+ if (model_worklist->model_priority == insn->model_priority
+ && !fallback
+ && model_classify_pressure (insn) < 0)
+ fallback = insn;
+ }
+ else
+ {
+ if (model_classify_pressure (insn) <= 0)
+ break;
+ }
+ count--;
+ insn = insn->next;
+ }
+
+ if (sched_verbose >= 7 && insn != model_worklist)
+ {
+ if (insn->unscheduled_preds)
+ fprintf (sched_dump, ";;\t+--- promoting insn %d, with dependencies\n",
+ INSN_UID (insn->insn));
+ else
+ fprintf (sched_dump, ";;\t+--- promoting insn %d, which is ready\n",
+ INSN_UID (insn->insn));
+ }
+ if (insn->unscheduled_preds)
+ /* INSN isn't yet ready to issue. Give all its predecessors the
+ highest priority. */
+ model_promote_predecessors (insn);
+ else
+ {
+ /* INSN is ready. Add it to the end of model_schedule and
+ process its successors. */
+ model_add_successors_to_worklist (insn);
+ model_remove_from_worklist (insn);
+ model_add_to_schedule (insn->insn);
+ model_record_pressures (insn);
+ update_register_pressure (insn->insn);
+ }
}
-/* Advance time on one cycle. */
-HAIFA_INLINE static void
-advance_one_cycle (void)
+/* Restore all QUEUE_INDEXs to the values that they had before
+ model_start_schedule was called. */
+
+static void
+model_reset_queue_indices (void)
{
- advance_state (curr_state);
- if (sched_verbose >= 6)
- fprintf (sched_dump, ";;\tAdvanced a state.\n");
+ unsigned int i;
+ rtx insn;
+
+ FOR_EACH_VEC_ELT (rtx, model_schedule, i, insn)
+ QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue;
}
-/* Update register pressure after scheduling INSN. */
+/* We have calculated the model schedule and spill costs. Print a summary
+ to sched_dump. */
+
static void
-update_register_pressure (rtx insn)
+model_dump_pressure_summary (void)
{
- struct reg_use_data *use;
- struct reg_set_data *set;
+ int pci, cl;
- gcc_checking_assert (!DEBUG_INSN_P (insn));
-
- for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use)
- if (dying_use_p (use) && bitmap_bit_p (curr_reg_live, use->regno))
- mark_regno_birth_or_death (use->regno, false);
- for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set)
- mark_regno_birth_or_death (set->regno, true);
+ fprintf (sched_dump, ";; Pressure summary:");
+ for (pci = 0; pci < ira_pressure_classes_num; pci++)
+ {
+ cl = ira_pressure_classes[pci];
+ fprintf (sched_dump, " %s:%d", reg_class_names[cl],
+ model_before_pressure.limits[pci].pressure);
+ }
+ fprintf (sched_dump, "\n\n");
}
-/* Set up or update (if UPDATE_P) max register pressure (see its
- meaning in sched-int.h::_haifa_insn_data) for all current BB insns
- after insn AFTER. */
+/* Initialize the SCHED_PRESSURE_MODEL information for the current
+ scheduling region. */
+
static void
-setup_insn_max_reg_pressure (rtx after, bool update_p)
+model_start_schedule (void)
{
- int i, p;
- bool eq_p;
- rtx insn;
- static int max_reg_pressure[N_REG_CLASSES];
+ basic_block bb;
- save_reg_pressure ();
- for (i = 0; i < ira_pressure_classes_num; i++)
- max_reg_pressure[ira_pressure_classes[i]]
- = curr_reg_pressure[ira_pressure_classes[i]];
- for (insn = NEXT_INSN (after);
- insn != NULL_RTX && ! BARRIER_P (insn)
- && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (after);
- insn = NEXT_INSN (insn))
- if (NONDEBUG_INSN_P (insn))
- {
- eq_p = true;
- for (i = 0; i < ira_pressure_classes_num; i++)
- {
- p = max_reg_pressure[ira_pressure_classes[i]];
- if (INSN_MAX_REG_PRESSURE (insn)[i] != p)
- {
- eq_p = false;
- INSN_MAX_REG_PRESSURE (insn)[i]
- = max_reg_pressure[ira_pressure_classes[i]];
- }
- }
- if (update_p && eq_p)
- break;
- update_register_pressure (insn);
- for (i = 0; i < ira_pressure_classes_num; i++)
- if (max_reg_pressure[ira_pressure_classes[i]]
- < curr_reg_pressure[ira_pressure_classes[i]])
- max_reg_pressure[ira_pressure_classes[i]]
- = curr_reg_pressure[ira_pressure_classes[i]];
- }
- restore_reg_pressure ();
+ model_next_priority = 1;
+ model_schedule = VEC_alloc (rtx, heap, sched_max_luid);
+ model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid);
+
+ bb = BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head));
+ initiate_reg_pressure_info (df_get_live_in (bb));
+
+ model_analyze_insns ();
+ model_init_pressure_group (&model_before_pressure);
+ while (model_worklist)
+ model_choose_insn ();
+ gcc_assert (model_num_insns == (int) VEC_length (rtx, model_schedule));
+ if (sched_verbose >= 2)
+ fprintf (sched_dump, "\n");
+
+ model_record_final_pressures (&model_before_pressure);
+ model_reset_queue_indices ();
+
+ XDELETEVEC (model_insns);
+
+ model_curr_point = 0;
+ initiate_reg_pressure_info (df_get_live_in (bb));
+ if (sched_verbose >= 1)
+ model_dump_pressure_summary ();
}
-/* Update the current register pressure after scheduling INSN. Update
- also max register pressure for unscheduled insns of the current
- BB. */
+/* Free the information associated with GROUP. */
+
static void
-update_reg_and_insn_max_reg_pressure (rtx insn)
+model_finalize_pressure_group (struct model_pressure_group *group)
{
- int i;
- int before[N_REG_CLASSES];
-
- for (i = 0; i < ira_pressure_classes_num; i++)
- before[i] = curr_reg_pressure[ira_pressure_classes[i]];
- update_register_pressure (insn);
- for (i = 0; i < ira_pressure_classes_num; i++)
- if (curr_reg_pressure[ira_pressure_classes[i]] != before[i])
- break;
- if (i < ira_pressure_classes_num)
- setup_insn_max_reg_pressure (insn, true);
+ XDELETEVEC (group->model);
}
-/* Set up register pressure at the beginning of basic block BB whose
- insns starting after insn AFTER. Set up also max register pressure
- for all insns of the basic block. */
-void
-sched_setup_bb_reg_pressure_info (basic_block bb, rtx after)
+/* Free the information created by model_start_schedule. */
+
+static void
+model_end_schedule (void)
{
- gcc_assert (sched_pressure_p);
- initiate_bb_reg_pressure_info (bb);
- setup_insn_max_reg_pressure (after, false);
+ model_finalize_pressure_group (&model_before_pressure);
+ VEC_free (rtx, heap, model_schedule);
}
\f
/* A structure that holds local state for the loop in schedule_block. */
/* True if a shadow insn has been scheduled in the current cycle, which
means that no more normal insns can be issued. */
bool shadows_only_p;
+ /* True if we're winding down a modulo schedule, which means that we only
+ issue insns with INSN_EXACT_TICK set. */
+ bool modulo_epilogue;
/* Initialized with the machine's issue rate every cycle, and updated
by calls to the variable_issue hook. */
int can_issue_more;
reg_class_names[ira_pressure_classes[i]],
pressure_info[i].set_increase, pressure_info[i].change);
}
+ if (sched_pressure == SCHED_PRESSURE_MODEL
+ && model_curr_point < model_num_insns
+ && model_index (insn) == model_curr_point)
+ fprintf (sched_dump, ":model %d", model_curr_point);
fputc ('\n', sched_dump);
}
- if (sched_pressure_p && !DEBUG_INSN_P (insn))
+ if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn))
update_reg_and_insn_max_reg_pressure (insn);
/* Scheduling instruction should have all its dependencies resolved and
should have been removed from the ready list. */
- gcc_assert (sd_lists_empty_p (insn, SD_LIST_BACK));
+ gcc_assert (sd_lists_empty_p (insn, SD_LIST_HARD_BACK));
/* Reset debug insns invalidated by moving this insn. */
if (MAY_HAVE_DEBUG_INSNS && !DEBUG_INSN_P (insn))
rtx dbg = DEP_PRO (dep);
struct reg_use_data *use, *next;
+ if (DEP_STATUS (dep) & DEP_CANCELLED)
+ {
+ sd_iterator_next (&sd_it);
+ continue;
+ }
+
gcc_assert (DEBUG_INSN_P (dbg));
if (sched_verbose >= 6)
gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE);
QUEUE_INDEX (insn) = QUEUE_SCHEDULED;
+ if (sched_pressure == SCHED_PRESSURE_MODEL
+ && model_curr_point < model_num_insns
+ && NONDEBUG_INSN_P (insn))
+ {
+ if (model_index (insn) == model_curr_point)
+ do
+ model_curr_point++;
+ while (model_curr_point < model_num_insns
+ && (QUEUE_INDEX (MODEL_INSN (model_curr_point))
+ == QUEUE_SCHEDULED));
+ else
+ model_recompute (insn);
+ model_update_limit_points ();
+ update_register_pressure (insn);
+ if (sched_verbose >= 2)
+ print_curr_reg_pressure ();
+ }
+
gcc_assert (INSN_TICK (insn) >= MIN_TICK);
if (INSN_TICK (insn) > clock_var)
/* INSN has been prematurely moved from the queue to the ready list.
INSN_TICK untouched. This is a machine-dependent issue, actually. */
INSN_TICK (insn) = clock_var;
+ check_clobbered_conditions (insn);
+
/* Update dependent instructions. */
for (sd_it = sd_iterator_start (insn, SD_LIST_FORW);
sd_iterator_cond (&sd_it, &dep);)
{
rtx next = DEP_CON (dep);
+ bool cancelled = (DEP_STATUS (dep) & DEP_CANCELLED) != 0;
/* Resolve the dependence between INSN and NEXT.
sd_resolve_dep () moves current dep to another list thus
advancing the iterator. */
sd_resolve_dep (sd_it);
+ if (cancelled)
+ {
+ if (QUEUE_INDEX (next) != QUEUE_SCHEDULED)
+ {
+ int tick = INSN_TICK (next);
+ gcc_assert (ORIG_PAT (next) != NULL_RTX);
+ haifa_change_pattern (next, ORIG_PAT (next));
+ INSN_TICK (next) = tick;
+ if (sd_lists_empty_p (next, SD_LIST_BACK))
+ TODO_SPEC (next) = 0;
+ else if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
+ TODO_SPEC (next) = HARD_DEP;
+ }
+ continue;
+ }
+
/* Don't bother trying to mark next as ready if insn is a debug
insn. If insn is the last hard dependency, it will have
already been discounted. */
}
}
-/* Make a copy of the INSN_LIST list LINK and return it. */
-static rtx
-copy_insn_list (rtx link)
-{
- rtx new_queue;
- rtx *pqueue = &new_queue;
-
- for (; link; link = XEXP (link, 1))
- {
- rtx x = XEXP (link, 0);
- rtx newlink = alloc_INSN_LIST (x, NULL);
- *pqueue = newlink;
- pqueue = &XEXP (newlink, 1);
- }
- *pqueue = NULL_RTX;
- return new_queue;
-}
-
/* Save the current scheduler state so that we can backtrack to it
later if necessary. PAIR gives the insns that make it necessary to
save this point. SCHED_BLOCK is the local state of schedule_block
for (i = 0; i <= max_insn_queue_index; i++)
{
int q = NEXT_Q_AFTER (q_ptr, i);
- save->insn_queue[i] = copy_insn_list (insn_queue[q]);
+ save->insn_queue[i] = copy_INSN_LIST (insn_queue[q]);
}
save->clock_var = clock_var;
mark_backtrack_feeds (pair->i2, 1);
INSN_TICK (pair->i2) = INVALID_TICK;
INSN_EXACT_TICK (pair->i2) = clock_var + pair_delay (pair);
- SHADOW_P (pair->i2) = true;
+ SHADOW_P (pair->i2) = pair->stages == 0;
pair = pair->next_same_i1;
}
}
+/* Walk the ready list and all queues. If any insns have unresolved backwards
+ dependencies, these must be cancelled deps, broken by predication. Set or
+ clear (depending on SET) the DEP_CANCELLED bit in DEP_STATUS. */
+
+static void
+toggle_cancelled_flags (bool set)
+{
+ int i;
+ sd_iterator_def sd_it;
+ dep_t dep;
+
+ if (ready.n_ready > 0)
+ {
+ rtx *first = ready_lastpos (&ready);
+ for (i = 0; i < ready.n_ready; i++)
+ FOR_EACH_DEP (first[i], SD_LIST_BACK, sd_it, dep)
+ if (!DEBUG_INSN_P (DEP_PRO (dep)))
+ {
+ if (set)
+ DEP_STATUS (dep) |= DEP_CANCELLED;
+ else
+ DEP_STATUS (dep) &= ~DEP_CANCELLED;
+ }
+ }
+ for (i = 0; i <= max_insn_queue_index; i++)
+ {
+ int q = NEXT_Q_AFTER (q_ptr, i);
+ rtx link;
+ for (link = insn_queue[q]; link; link = XEXP (link, 1))
+ {
+ rtx insn = XEXP (link, 0);
+ FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep)
+ if (!DEBUG_INSN_P (DEP_PRO (dep)))
+ {
+ if (set)
+ DEP_STATUS (dep) |= DEP_CANCELLED;
+ else
+ DEP_STATUS (dep) &= ~DEP_CANCELLED;
+ }
+ }
+ }
+}
+
/* Pop entries from the SCHEDULED_INSNS vector up to and including INSN.
Restore their dependencies to an unresolved state, and mark them as
queued nowhere. */
static void
unschedule_insns_until (rtx insn)
{
+ VEC (rtx, heap) *recompute_vec;
+
+ recompute_vec = VEC_alloc (rtx, heap, 0);
+
+ /* Make two passes over the insns to be unscheduled. First, we clear out
+ dependencies and other trivial bookkeeping. */
for (;;)
{
rtx last;
if (last != insn)
INSN_TICK (last) = INVALID_TICK;
+ if (modulo_ii > 0 && INSN_UID (last) < modulo_iter0_max_uid)
+ modulo_insns_scheduled--;
+
for (sd_it = sd_iterator_start (last, SD_LIST_RES_FORW);
sd_iterator_cond (&sd_it, &dep);)
{
rtx con = DEP_CON (dep);
- TODO_SPEC (con) |= HARD_DEP;
- INSN_TICK (con) = INVALID_TICK;
sd_unresolve_dep (sd_it);
+ if (!MUST_RECOMPUTE_SPEC_P (con))
+ {
+ MUST_RECOMPUTE_SPEC_P (con) = 1;
+ VEC_safe_push (rtx, heap, recompute_vec, con);
+ }
}
if (last == insn)
break;
}
+
+ /* A second pass, to update ready and speculation status for insns
+ depending on the unscheduled ones. The first pass must have
+ popped the scheduled_insns vector up to the point where we
+ restart scheduling, as recompute_todo_spec requires it to be
+ up-to-date. */
+ while (!VEC_empty (rtx, recompute_vec))
+ {
+ rtx con;
+
+ con = VEC_pop (rtx, recompute_vec);
+ MUST_RECOMPUTE_SPEC_P (con) = 0;
+ if (!sd_lists_empty_p (con, SD_LIST_HARD_BACK))
+ {
+ TODO_SPEC (con) = HARD_DEP;
+ INSN_TICK (con) = INVALID_TICK;
+ if (PREDICATED_PAT (con) != NULL_RTX)
+ haifa_change_pattern (con, ORIG_PAT (con));
+ }
+ else if (QUEUE_INDEX (con) != QUEUE_SCHEDULED)
+ TODO_SPEC (con) = recompute_todo_spec (con);
+ }
+ VEC_free (rtx, heap, recompute_vec);
}
/* Restore scheduler state from the topmost entry on the backtracking queue.
static void
restore_last_backtrack_point (struct sched_block_state *psched_block)
-
{
rtx link;
int i;
rtx *first = ready_lastpos (&ready);
for (i = 0; i < ready.n_ready; i++)
{
- QUEUE_INDEX (first[i]) = QUEUE_NOWHERE;
- INSN_TICK (first[i]) = INVALID_TICK;
+ rtx insn = first[i];
+ QUEUE_INDEX (insn) = QUEUE_NOWHERE;
+ INSN_TICK (insn) = INVALID_TICK;
}
}
for (i = 0; i <= max_insn_queue_index; i++)
rtx *first = ready_lastpos (&ready);
for (i = 0; i < ready.n_ready; i++)
{
- QUEUE_INDEX (first[i]) = QUEUE_READY;
- INSN_TICK (first[i]) = save->clock_var;
+ rtx insn = first[i];
+ QUEUE_INDEX (insn) = QUEUE_READY;
+ TODO_SPEC (insn) = recompute_todo_spec (insn);
+ INSN_TICK (insn) = save->clock_var;
}
}
{
rtx x = XEXP (link, 0);
QUEUE_INDEX (x) = i;
+ TODO_SPEC (x) = recompute_todo_spec (x);
INSN_TICK (x) = save->clock_var + i;
}
}
free (save->insn_queue);
+ toggle_cancelled_flags (true);
+
clock_var = save->clock_var;
last_clock_var = save->last_clock_var;
cycle_issued_insns = save->cycle_issued_insns;
rtx pro = DEP_PRO (dep);
int t;
+ if (DEP_STATUS (dep) & DEP_CANCELLED)
+ continue;
+
if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED)
gcc_assert (INSN_TICK (pro) + dep_cost (dep) <= INSN_TICK (insn));
else
return 0;
}
+/* If INSN has no unresolved backwards dependencies, add it to the schedule and
+ recursively resolve all its forward dependencies. */
+static void
+resolve_dependencies (rtx insn)
+{
+ sd_iterator_def sd_it;
+ dep_t dep;
+
+ /* Don't use sd_lists_empty_p; it ignores debug insns. */
+ if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (insn)) != NULL
+ || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (insn)) != NULL)
+ return;
+
+ if (sched_verbose >= 4)
+ fprintf (sched_dump, ";;\tquickly resolving %d\n", INSN_UID (insn));
+
+ if (QUEUE_INDEX (insn) >= 0)
+ queue_remove (insn);
+
+ VEC_safe_push (rtx, heap, scheduled_insns, insn);
+
+ /* Update dependent instructions. */
+ for (sd_it = sd_iterator_start (insn, SD_LIST_FORW);
+ sd_iterator_cond (&sd_it, &dep);)
+ {
+ rtx next = DEP_CON (dep);
+
+ if (sched_verbose >= 4)
+ fprintf (sched_dump, ";;\t\tdep %d against %d\n", INSN_UID (insn),
+ INSN_UID (next));
+
+ /* Resolve the dependence between INSN and NEXT.
+ sd_resolve_dep () moves current dep to another list thus
+ advancing the iterator. */
+ sd_resolve_dep (sd_it);
+
+ if (!IS_SPECULATION_BRANCHY_CHECK_P (insn))
+ {
+ resolve_dependencies (next);
+ }
+ else
+ /* Check always has only one forward dependence (to the first insn in
+ the recovery block), therefore, this will be executed only once. */
+ {
+ gcc_assert (sd_lists_empty_p (insn, SD_LIST_FORW));
+ }
+ }
+}
+
+
/* Return the head and tail pointers of ebb starting at BEG and ending
at END. */
void
/* If the ready list is full, delay the insn for 1 cycle.
See the comment in schedule_block for the rationale. */
if (!reload_completed
- && ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
+ && (ready->n_ready - ready->n_debug > MAX_SCHED_READY_INSNS
+ || (sched_pressure == SCHED_PRESSURE_MODEL
+ /* Limit pressure recalculations to MAX_SCHED_READY_INSNS
+ instructions too. */
+ && model_index (insn) > (model_curr_point
+ + MAX_SCHED_READY_INSNS)))
+ && !(sched_pressure == SCHED_PRESSURE_MODEL
+ && model_curr_point < model_num_insns
+ /* Always allow the next model instruction to issue. */
+ && model_index (insn) == model_curr_point)
&& !SCHED_GROUP_P (insn)
&& insn != skip_insn)
queue_insn (insn, 1, "ready full");
fprintf (sched_dump, " %s:%d",
(*current_sched_info->print_insn) (p[i], 0),
INSN_LUID (p[i]));
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
fprintf (sched_dump, "(cost=%d",
INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i]));
if (INSN_TICK (p[i]) > clock_var)
fprintf (sched_dump, ":delay=%d", INSN_TICK (p[i]) - clock_var);
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
fprintf (sched_dump, ")");
}
fprintf (sched_dump, "\n");
issue an asm statement.
If SHADOWS_ONLY_P is true, we eliminate all real insns and only
- leave those for which SHADOW_P is true.
-
- Return the number of cycles we must
- advance to find the next ready instruction, or zero if there remain
- insns on the ready list. */
+ leave those for which SHADOW_P is true. If MODULO_EPILOGUE is true,
+ we only leave insns which have an INSN_EXACT_TICK. */
static void
prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
- bool shadows_only_p)
+ bool shadows_only_p, bool modulo_epilogue_p)
{
- int i;
+ int i, pass;
+ bool sched_group_found = false;
+ int min_cost_group = 1;
- restart:
for (i = 0; i < ready.n_ready; i++)
{
rtx insn = ready_element (&ready, i);
- int cost = 0;
- const char *reason = "resource conflict";
-
- if (shadows_only_p && !DEBUG_INSN_P (insn) && !SHADOW_P (insn))
+ if (SCHED_GROUP_P (insn))
{
- cost = 1;
- reason = "not a shadow";
- }
- else if (recog_memoized (insn) < 0)
- {
- if (!first_cycle_insn_p
- && (GET_CODE (PATTERN (insn)) == ASM_INPUT
- || asm_noperands (PATTERN (insn)) >= 0))
- cost = 1;
- reason = "asm";
+ sched_group_found = true;
+ break;
}
- else if (sched_pressure_p)
- cost = 0;
- else
+ }
+
+ /* Make two passes if there's a SCHED_GROUP_P insn; make sure to handle
+ such an insn first and note its cost, then schedule all other insns
+ for one cycle later. */
+ for (pass = sched_group_found ? 0 : 1; pass < 2; )
+ {
+ int n = ready.n_ready;
+ for (i = 0; i < n; i++)
{
- int delay_cost = 0;
+ rtx insn = ready_element (&ready, i);
+ int cost = 0;
+ const char *reason = "resource conflict";
- if (delay_htab)
+ if (DEBUG_INSN_P (insn))
+ continue;
+
+ if (sched_group_found && !SCHED_GROUP_P (insn))
{
- struct delay_pair *delay_entry;
- delay_entry
- = (struct delay_pair *)htab_find_with_hash (delay_htab, insn,
- htab_hash_pointer (insn));
- while (delay_entry && delay_cost == 0)
+ if (pass == 0)
+ continue;
+ cost = min_cost_group;
+ reason = "not in sched group";
+ }
+ else if (modulo_epilogue_p
+ && INSN_EXACT_TICK (insn) == INVALID_TICK)
+ {
+ cost = max_insn_queue_index;
+ reason = "not an epilogue insn";
+ }
+ else if (shadows_only_p && !SHADOW_P (insn))
+ {
+ cost = 1;
+ reason = "not a shadow";
+ }
+ else if (recog_memoized (insn) < 0)
+ {
+ if (!first_cycle_insn_p
+ && (GET_CODE (PATTERN (insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (insn)) >= 0))
+ cost = 1;
+ reason = "asm";
+ }
+ else if (sched_pressure != SCHED_PRESSURE_NONE)
+ {
+ if (sched_pressure == SCHED_PRESSURE_MODEL
+ && INSN_TICK (insn) <= clock_var)
{
- delay_cost = estimate_shadow_tick (delay_entry);
- if (delay_cost > max_insn_queue_index)
- delay_cost = max_insn_queue_index;
- delay_entry = delay_entry->next_same_i1;
+ memcpy (temp_state, curr_state, dfa_state_size);
+ if (state_transition (temp_state, insn) >= 0)
+ INSN_TICK (insn) = clock_var + 1;
}
+ cost = 0;
}
+ else
+ {
+ int delay_cost = 0;
- memcpy (temp_state, curr_state, dfa_state_size);
- cost = state_transition (temp_state, insn);
- if (cost < 0)
- cost = 0;
- else if (cost == 0)
- cost = 1;
- if (cost < delay_cost)
+ if (delay_htab)
+ {
+ struct delay_pair *delay_entry;
+ delay_entry
+ = (struct delay_pair *)htab_find_with_hash (delay_htab, insn,
+ htab_hash_pointer (insn));
+ while (delay_entry && delay_cost == 0)
+ {
+ delay_cost = estimate_shadow_tick (delay_entry);
+ if (delay_cost > max_insn_queue_index)
+ delay_cost = max_insn_queue_index;
+ delay_entry = delay_entry->next_same_i1;
+ }
+ }
+
+ memcpy (temp_state, curr_state, dfa_state_size);
+ cost = state_transition (temp_state, insn);
+ if (cost < 0)
+ cost = 0;
+ else if (cost == 0)
+ cost = 1;
+ if (cost < delay_cost)
+ {
+ cost = delay_cost;
+ reason = "shadow tick";
+ }
+ }
+ if (cost >= 1)
{
- cost = delay_cost;
- reason = "shadow tick";
+ if (SCHED_GROUP_P (insn) && cost > min_cost_group)
+ min_cost_group = cost;
+ ready_remove (&ready, i);
+ queue_insn (insn, cost, reason);
+ if (i + 1 < n)
+ break;
}
}
- if (cost >= 1)
- {
- ready_remove (&ready, i);
- queue_insn (insn, cost, reason);
- goto restart;
- }
+ if (i == n)
+ pass++;
}
}
TARGET_BB, possibly bringing insns from subsequent blocks in the same
region. */
-void
+bool
schedule_block (basic_block *target_bb)
{
int i;
+ bool success = modulo_ii == 0;
struct sched_block_state ls;
state_t temp_state = NULL; /* It is used for multipass scheduling. */
int sort_p, advance, start_clock_var;
in try_ready () (which is called through init_ready_list ()). */
(*current_sched_info->init_ready_list) ();
+ if (sched_pressure == SCHED_PRESSURE_MODEL)
+ model_start_schedule ();
+
/* The algorithm is O(n^2) in the number of ready insns at any given
time in the worst case. Before reload we are more likely to have
big lists so truncate them to a reasonable size. */
gcc_assert (VEC_length (rtx, scheduled_insns) == 0);
sort_p = TRUE;
must_backtrack = false;
+ modulo_insns_scheduled = 0;
+
+ ls.modulo_epilogue = false;
/* Loop until all the insns in BB are scheduled. */
while ((*current_sched_info->schedule_more_p) ())
}
while (advance > 0);
- if (ready.n_ready > 0)
- prune_ready_list (temp_state, true, false);
+ if (ls.modulo_epilogue)
+ {
+ int stage = clock_var / modulo_ii;
+ if (stage > modulo_last_stage * 2 + 2)
+ {
+ if (sched_verbose >= 2)
+ fprintf (sched_dump,
+ ";;\t\tmodulo scheduled succeeded at II %d\n",
+ modulo_ii);
+ success = true;
+ goto end_schedule;
+ }
+ }
+ else if (modulo_ii > 0)
+ {
+ int stage = clock_var / modulo_ii;
+ if (stage > modulo_max_stages)
+ {
+ if (sched_verbose >= 2)
+ fprintf (sched_dump,
+ ";;\t\tfailing schedule due to excessive stages\n");
+ goto end_schedule;
+ }
+ if (modulo_n_insns == modulo_insns_scheduled
+ && stage > modulo_last_stage)
+ {
+ if (sched_verbose >= 2)
+ fprintf (sched_dump,
+ ";;\t\tfound kernel after %d stages, II %d\n",
+ stage, modulo_ii);
+ ls.modulo_epilogue = true;
+ }
+ }
+
+ prune_ready_list (temp_state, true, false, ls.modulo_epilogue);
if (ready.n_ready == 0)
continue;
if (must_backtrack)
fprintf (sched_dump, ";;\tReady list (t = %3d): ",
clock_var);
debug_ready_list (&ready);
- if (sched_pressure_p)
+ if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
print_curr_reg_pressure ();
}
else
insn = ready_remove_first (&ready);
- if (sched_pressure_p && INSN_TICK (insn) > clock_var)
+ if (sched_pressure != SCHED_PRESSURE_NONE
+ && INSN_TICK (insn) > clock_var)
{
ready_add (&ready, insn, true);
advance = 1;
/* DECISION is made. */
+ if (modulo_ii > 0 && INSN_UID (insn) < modulo_iter0_max_uid)
+ {
+ modulo_insns_scheduled++;
+ modulo_last_stage = clock_var / modulo_ii;
+ }
if (TODO_SPEC (insn) & SPECULATIVE)
generate_recovery_code (insn);
{
memcpy (temp_state, curr_state, dfa_state_size);
cost = state_transition (curr_state, insn);
- if (!sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_WEIGHTED)
gcc_assert (cost < 0);
if (memcmp (temp_state, curr_state, dfa_state_size) != 0)
cycle_issued_insns++;
ls.first_cycle_insn_p = false;
if (ready.n_ready > 0)
- prune_ready_list (temp_state, false, ls.shadows_only_p);
+ prune_ready_list (temp_state, false, ls.shadows_only_p,
+ ls.modulo_epilogue);
}
do_backtrack:
break;
}
}
+ if (must_backtrack && modulo_ii > 0)
+ {
+ if (modulo_backtracks_left == 0)
+ goto end_schedule;
+ modulo_backtracks_left--;
+ }
while (must_backtrack)
{
struct haifa_saved_data *failed;
gcc_assert (failed);
failed_insn = failed->delay_pair->i1;
+ toggle_cancelled_flags (false);
unschedule_insns_until (failed_insn);
while (failed != backtrack_queue)
free_topmost_backtrack_point (true);
}
}
}
+ if (ls.modulo_epilogue)
+ success = true;
end_schedule:
+ if (modulo_ii > 0)
+ {
+ /* Once again, debug insn suckiness: they can be on the ready list
+ even if they have unresolved dependencies. To make our view
+ of the world consistent, remove such "ready" insns. */
+ restart_debug_insn_loop:
+ for (i = ready.n_ready - 1; i >= 0; i--)
+ {
+ rtx x;
+
+ x = ready_element (&ready, i);
+ if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (x)) != NULL
+ || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (x)) != NULL)
+ {
+ ready_remove (&ready, i);
+ goto restart_debug_insn_loop;
+ }
+ }
+ for (i = ready.n_ready - 1; i >= 0; i--)
+ {
+ rtx x;
+
+ x = ready_element (&ready, i);
+ resolve_dependencies (x);
+ }
+ for (i = 0; i <= max_insn_queue_index; i++)
+ {
+ rtx link;
+ while ((link = insn_queue[i]) != NULL)
+ {
+ rtx x = XEXP (link, 0);
+ insn_queue[i] = XEXP (link, 1);
+ QUEUE_INDEX (x) = QUEUE_NOWHERE;
+ free_INSN_LIST_node (link);
+ resolve_dependencies (x);
+ }
+ }
+ }
+
/* Debug info. */
if (sched_verbose)
{
debug_ready_list (&ready);
}
- if (current_sched_info->queue_must_finish_empty)
+ if (modulo_ii == 0 && current_sched_info->queue_must_finish_empty)
/* Sanity check -- queue must be empty now. Meaningless if region has
multiple bbs. */
gcc_assert (!q_size && !ready.n_ready && !ready.n_debug);
- else
+ else if (modulo_ii == 0)
{
/* We must maintain QUEUE_INDEX between blocks in region. */
for (i = ready.n_ready - 1; i >= 0; i--)
}
}
- commit_schedule (prev_head, tail, target_bb);
- if (sched_verbose)
- fprintf (sched_dump, ";; total time = %d\n", clock_var);
+ if (sched_pressure == SCHED_PRESSURE_MODEL)
+ model_end_schedule ();
+
+ if (success)
+ {
+ commit_schedule (prev_head, tail, target_bb);
+ if (sched_verbose)
+ fprintf (sched_dump, ";; total time = %d\n", clock_var);
+ }
+ else
+ last_scheduled_insn = tail;
+
+ VEC_truncate (rtx, scheduled_insns, 0);
if (!current_sched_info->queue_must_finish_empty
|| haifa_recovery_bb_recently_added_p)
current_sched_info->tail = tail;
free_backtrack_queue ();
+
+ return success;
}
\f
/* Set_priorities: compute priority of each insn in the block. */
if (targetm.sched.dispatch (NULL_RTX, IS_DISPATCH_ON))
targetm.sched.dispatch_do (NULL_RTX, DISPATCH_INIT);
- sched_pressure_p = (flag_sched_pressure && ! reload_completed
- && common_sched_info->sched_pass_id == SCHED_RGN_PASS);
+ if (flag_sched_pressure
+ && !reload_completed
+ && common_sched_info->sched_pass_id == SCHED_RGN_PASS)
+ sched_pressure = flag_sched_pressure_algorithm;
+ else
+ sched_pressure = SCHED_PRESSURE_NONE;
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
ira_setup_eliminable_regset ();
/* Initialize SPEC_INFO. */
if (targetm.sched.init_global)
targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1);
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
{
int i, max_regno = max_reg_num ();
+ if (sched_dump != NULL)
+ /* We need info about pseudos for rtl dumps about pseudo
+ classes and costs. */
+ regstat_init_n_sets_and_refs ();
ira_set_pseudo_classes (sched_verbose ? sched_dump : NULL);
sched_regno_pressure_class
= (enum reg_class *) xmalloc (max_regno * sizeof (enum reg_class));
? ira_pressure_class_translate[REGNO_REG_CLASS (i)]
: ira_pressure_class_translate[reg_allocno_class (i)]);
curr_reg_live = BITMAP_ALLOC (NULL);
- saved_reg_live = BITMAP_ALLOC (NULL);
- region_ref_regs = BITMAP_ALLOC (NULL);
+ if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+ {
+ saved_reg_live = BITMAP_ALLOC (NULL);
+ region_ref_regs = BITMAP_ALLOC (NULL);
+ }
}
curr_state = xmalloc (dfa_state_size);
nr_begin_data = nr_begin_control = nr_be_in_data = nr_be_in_control = 0;
before_recovery = 0;
after_recovery = 0;
+
+ modulo_ii = 0;
}
/* Finish work with the data specific to the Haifa scheduler. */
sched_finish (void)
{
haifa_finish_h_i_d ();
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
{
- free (sched_regno_pressure_class);
- BITMAP_FREE (region_ref_regs);
- BITMAP_FREE (saved_reg_live);
+ if (regstat_n_sets_and_refs != NULL)
+ regstat_free_n_sets_and_refs ();
+ if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
+ {
+ BITMAP_FREE (region_ref_regs);
+ BITMAP_FREE (saved_reg_live);
+ }
BITMAP_FREE (curr_reg_live);
+ free (sched_regno_pressure_class);
}
free (curr_state);
bitmap_clear (&processed);
}
-static int haifa_speculate_insn (rtx, ds_t, rtx *);
-
/* Check if NEXT is ready to be added to the ready or queue list.
If "yes", add it to the proper list.
Returns:
old_ts = TODO_SPEC (next);
- gcc_assert (!(old_ts & ~(SPECULATIVE | HARD_DEP))
+ gcc_assert (!(old_ts & ~(SPECULATIVE | HARD_DEP | DEP_CONTROL))
&& ((old_ts & HARD_DEP)
- || (old_ts & SPECULATIVE)));
-
- if (sd_lists_empty_p (next, SD_LIST_BACK))
- /* NEXT has all its dependencies resolved. */
- new_ts = 0;
- else
- {
- /* One of the NEXT's dependencies has been resolved.
- Recalculate NEXT's status. */
-
- if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK))
- new_ts = HARD_DEP;
- else
- /* Now we've got NEXT with speculative deps only.
- 1. Look at the deps to see what we have to do.
- 2. Check if we can do 'todo'. */
- {
- sd_iterator_def sd_it;
- dep_t dep;
- bool first_p = true;
-
- new_ts = 0;
-
- FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep)
- {
- ds_t ds = DEP_STATUS (dep) & SPECULATIVE;
-
- if (DEBUG_INSN_P (DEP_PRO (dep))
- && !DEBUG_INSN_P (next))
- continue;
+ || (old_ts & SPECULATIVE)
+ || (old_ts & DEP_CONTROL)));
- if (first_p)
- {
- first_p = false;
-
- new_ts = ds;
- }
- else
- new_ts = ds_merge (new_ts, ds);
- }
-
- if (ds_weak (new_ts) < spec_info->data_weakness_cutoff)
- /* Too few points. */
- new_ts = HARD_DEP;
- }
- }
+ new_ts = recompute_todo_spec (next);
if (new_ts & HARD_DEP)
- gcc_assert (new_ts == HARD_DEP && new_ts == old_ts
+ gcc_assert (new_ts == old_ts
&& QUEUE_INDEX (next) == QUEUE_NOWHERE);
else if (current_sched_info->new_ready)
new_ts = current_sched_info->new_ready (next, new_ts);
int res;
rtx new_pat;
- gcc_assert (!(new_ts & ~SPECULATIVE));
+ gcc_assert ((new_ts & SPECULATIVE) && !(new_ts & ~SPECULATIVE));
res = haifa_speculate_insn (next, new_ts, &new_pat);
save it. */
ORIG_PAT (next) = PATTERN (next);
- haifa_change_pattern (next, new_pat);
+ res = haifa_change_pattern (next, new_pat);
+ gcc_assert (res);
break;
default:
/*gcc_assert (QUEUE_INDEX (next) == QUEUE_NOWHERE);*/
change_queue_index (next, QUEUE_NOWHERE);
+
return -1;
}
else if (!(new_ts & BEGIN_SPEC)
- && ORIG_PAT (next) && !IS_SPECULATION_CHECK_P (next))
+ && ORIG_PAT (next) && PREDICATED_PAT (next) == NULL_RTX
+ && !IS_SPECULATION_CHECK_P (next))
/* We should change pattern of every previously speculative
instruction - and we determine if NEXT was speculative by using
ORIG_PAT field. Except one case - speculation checks have ORIG_PAT
pat too, so skip them. */
{
- haifa_change_pattern (next, ORIG_PAT (next));
+ bool success = haifa_change_pattern (next, ORIG_PAT (next));
+ gcc_assert (success);
ORIG_PAT (next) = 0;
}
if (new_ts & BE_IN_CONTROL)
fprintf (spec_info->dump, "; in-control-spec;");
}
-
+ if (TODO_SPEC (next) & DEP_CONTROL)
+ fprintf (sched_dump, " predicated");
fprintf (sched_dump, "\n");
}
INSN_TICK (next) = tick;
delay = tick - clock_var;
- if (delay <= 0 || sched_pressure_p)
+ if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE)
delay = QUEUE_READY;
change_queue_index (next, delay);
add_jump_dependencies (insn, jump);
}
-/* Change pattern of INSN to NEW_PAT. */
-void
-sched_change_pattern (rtx insn, rtx new_pat)
+/* Change pattern of INSN to NEW_PAT. Invalidate cached haifa
+ instruction data. */
+static bool
+haifa_change_pattern (rtx insn, rtx new_pat)
{
sd_iterator_def sd_it;
dep_t dep;
int t;
t = validate_change (insn, &PATTERN (insn), new_pat, 0);
- gcc_assert (t);
+ if (!t)
+ return false;
dfa_clear_single_insn_cache (insn);
- for (sd_it = sd_iterator_start (insn, (SD_LIST_FORW | SD_LIST_BACK
- | SD_LIST_RES_BACK));
- sd_iterator_cond (&sd_it, &dep);)
+ sd_it = sd_iterator_start (insn,
+ SD_LIST_FORW | SD_LIST_BACK | SD_LIST_RES_BACK);
+ while (sd_iterator_cond (&sd_it, &dep))
{
DEP_COST (dep) = UNKNOWN_DEP_COST;
sd_iterator_next (&sd_it);
}
-}
-
-/* Change pattern of INSN to NEW_PAT. Invalidate cached haifa
- instruction data. */
-static void
-haifa_change_pattern (rtx insn, rtx new_pat)
-{
- sched_change_pattern (insn, new_pat);
/* Invalidate INSN_COST, so it'll be recalculated. */
INSN_COST (insn) = -1;
/* Invalidate INSN_TICK, so it'll be recalculated. */
INSN_TICK (insn) = INVALID_TICK;
+ return true;
}
/* -1 - can't speculate,
if (insn == jump)
break;
- if (dep_list_size (insn) == 0)
+ if (dep_list_size (insn, SD_LIST_FORW) == 0)
{
dep_def _new_dep, *new_dep = &_new_dep;
gcc_assert (!sd_lists_empty_p (jump, SD_LIST_BACK));
}
-/* Return the NOTE_INSN_BASIC_BLOCK of BB. */
-rtx
-bb_note (basic_block bb)
-{
- rtx note;
-
- note = BB_HEAD (bb);
- if (LABEL_P (note))
- note = NEXT_INSN (note);
-
- gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note));
- return note;
-}
-
/* Extend data structures for logical insn UID. */
void
sched_extend_luids (void)
FOR_EACH_VEC_ELT (haifa_insn_data_def, h_i_d, i, data)
{
+ free (data->max_reg_pressure);
free (data->reg_pressure);
for (use = data->reg_use_list; use != NULL; use = next)
{
/* Extend dependency caches by one element. */
extend_dependency_caches (1, false);
}
- if (sched_pressure_p)
+ if (sched_pressure != SCHED_PRESSURE_NONE)
init_insn_reg_pressure_info (insn);
}