int *loop_outer_loop;
-#ifdef HAIFA
-/* The main output of analyze_loop_iterations is placed here */
-
-int *loop_can_insert_bct;
-
-/* For each loop, determines whether some of its inner loops has used
- count register */
+#ifdef HAVE_decrement_and_branch_on_count
+/* Records whether resource in use by inner loop. */
int *loop_used_count_register;
-
-/* loop parameters for arithmetic loops. These loops have a loop variable
- which is initialized to loop_start_value, incremented in each iteration
- by "loop_increment". At the end of the iteration the loop variable is
- compared to the loop_comparison_value (using loop_comparison_code). */
-
-rtx *loop_increment;
-rtx *loop_comparison_value;
-rtx *loop_start_value;
-enum rtx_code *loop_comparison_code;
-#endif /* HAIFA */
+#endif /* HAVE_decrement_and_branch_on_count */
/* For each loop, keep track of its unrolling factor.
Potential values:
static int consec_sets_invariant_p PROTO((rtx, int, rtx));
static rtx libcall_other_reg PROTO((rtx, rtx));
static int labels_in_range_p PROTO((rtx, int));
+static void count_one_set PROTO((rtx, rtx, varray_type, rtx *));
+
static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type,
int *, int));
static void note_addr_stored PROTO((rtx, rtx));
&& INSN_LUID (INSN) >= INSN_LUID (START) \
&& INSN_LUID (INSN) <= INSN_LUID (END))
-#ifdef HAIFA
-/* This is extern from unroll.c */
-extern void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
-
-/* Two main functions for implementing bct:
- first - to be called before loop unrolling, and the second - after */
#ifdef HAVE_decrement_and_branch_on_count
-static void analyze_loop_iterations PROTO((rtx, rtx));
+/* Test whether BCT applicable and safe. */
static void insert_bct PROTO((rtx, rtx));
-/* Auxiliary function that inserts the bct pattern into the loop */
+/* Auxiliary function that inserts the BCT pattern into the loop. */
static void instrument_loop_bct PROTO((rtx, rtx, rtx));
#endif /* HAVE_decrement_and_branch_on_count */
-#endif /* HAIFA */
/* Indirect_jump_in_function is computed once per function. */
int indirect_jump_in_function = 0;
loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int));
bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int));
-#ifdef HAIFA
+#ifdef HAVE_decrement_and_branch_on_count
/* Allocate for BCT optimization */
- loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int));
- bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int));
-
loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int));
bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int));
-
- loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx));
- loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
- loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
- bzero ((char *) loop_increment, max_loop_num * sizeof (rtx));
- bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx));
- bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx));
-
- loop_comparison_code
- = (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code));
- bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code));
-#endif /* HAIFA */
+#endif /* HAVE_decrement_and_branch_on_count */
/* Find and process each loop.
First, find them, and record them in order of their beginnings. */
We don't know its life-span, so we can't compute the benefit. */
if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
;
- /* In order to move a register, we need to have one of three cases:
- (1) it is used only in the same basic block as the set
- (2) it is not a user variable and it is not used in the
- exit test (this can cause the variable to be used
- before it is set just like a user-variable).
- (3) the set is guaranteed to be executed once the loop starts,
- and the reg is not used until after that. */
- else if (! ((! maybe_never
- && ! loop_reg_used_before_p (set, p, loop_start,
- scan_start, end))
- || (! REG_USERVAR_P (SET_DEST (set))
- && ! REG_LOOP_TEST_P (SET_DEST (set)))
- || reg_in_basic_block_p (p, SET_DEST (set))))
+ else if (/* The set is a user-variable or it is used in
+ the exit test (this can cause the variable to be
+ used before it is set just like a
+ user-variable)... */
+ (REG_USERVAR_P (SET_DEST (set))
+ || REG_LOOP_TEST_P (SET_DEST (set)))
+ /* And the set is not guaranteed to be executed one
+ the loop starts, or the value before the set is
+ needed before the set occurs... */
+ && (maybe_never
+ || loop_reg_used_before_p (set, p, loop_start,
+ scan_start, end))
+ /* And the register is used in basic blocks other
+ than the one where it is set (meaning that
+ something after this point in the loop might
+ depend on its value before the set). */
+ && !reg_in_basic_block_p (p, SET_DEST (set)))
+ /* It is unsafe to move the set. The fact that these
+ three conditions are considered in conjunction means
+ that we are assuming various conditions, such as:
+
+ o It's OK to move a set of a variable which was not
+ created by the user and is not used in an exit test
+ even if that point in the set would not be reached
+ during execution of the loop. */
;
else if ((tem = invariant_p (src))
&& (dependencies == 0
if (loop_dump_stream)
fprintf (loop_dump_stream, "savings %d ", savings);
- if (moved_once[regno])
- {
- insn_count *= 2;
-
- if (loop_dump_stream)
- fprintf (loop_dump_stream, "halved since already moved ");
- }
+ if (moved_once[regno] && loop_dump_stream)
+ fprintf (loop_dump_stream, "halved since already moved ");
/* An insn MUST be moved if we already moved something else
which is safe only if this one is moved too: that is,
if (already_moved[regno]
|| flag_move_all_movables
- || (threshold * savings * m->lifetime) >= insn_count
+ || (threshold * savings * m->lifetime) >=
+ (moved_once[regno] ? insn_count * 2 : insn_count)
|| (m->forces && m->forces->done
&& VARRAY_INT (n_times_used, m->forces->regno) == 1))
{
if (loop_num != -1)
{
-#ifdef HAIFA
+#ifdef HAVE_decrement_and_branch_on_count
LABEL_OUTSIDE_LOOP_P (x) = 1;
LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num];
-#endif /* HAIFA */
+#endif /* HAVE_decrement_and_branch_on_count */
loop_number_exit_labels[loop_num] = x;
}
}
\f
+/* Count and record any set in X which is contained in INSN. Update
+ MAY_NOT_MOVE and LAST_SET for any register set in X. */
+
+static void
+count_one_set (insn, x, may_not_move, last_set)
+ rtx insn, x;
+ varray_type may_not_move;
+ rtx *last_set;
+{
+ if (GET_CODE (x) == CLOBBER && GET_CODE (XEXP (x, 0)) == REG)
+ /* Don't move a reg that has an explicit clobber.
+ It's not worth the pain to try to do it correctly. */
+ VARRAY_CHAR (may_not_move, REGNO (XEXP (x, 0))) = 1;
+
+ if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
+ {
+ rtx dest = SET_DEST (x);
+ while (GET_CODE (dest) == SUBREG
+ || GET_CODE (dest) == ZERO_EXTRACT
+ || GET_CODE (dest) == SIGN_EXTRACT
+ || GET_CODE (dest) == STRICT_LOW_PART)
+ dest = XEXP (dest, 0);
+ if (GET_CODE (dest) == REG)
+ {
+ register int regno = REGNO (dest);
+ /* If this is the first setting of this reg
+ in current basic block, and it was set before,
+ it must be set in two basic blocks, so it cannot
+ be moved out of the loop. */
+ if (VARRAY_INT (n_times_set, regno) > 0
+ && last_set[regno] == 0)
+ VARRAY_CHAR (may_not_move, regno) = 1;
+ /* If this is not first setting in current basic block,
+ see if reg was used in between previous one and this.
+ If so, neither one can be moved. */
+ if (last_set[regno] != 0
+ && reg_used_between_p (dest, last_set[regno], insn))
+ VARRAY_CHAR (may_not_move, regno) = 1;
+ if (VARRAY_INT (n_times_set, regno) < 127)
+ ++VARRAY_INT (n_times_set, regno);
+ last_set[regno] = insn;
+ }
+ }
+}
+
/* Increment N_TIMES_SET at the index of each register
that is modified by an insn between FROM and TO.
If the value of an element of N_TIMES_SET becomes 127 or more,
find_single_use_in_loop (insn, REG_NOTES (insn), single_usage);
}
- if (GET_CODE (PATTERN (insn)) == CLOBBER
- && GET_CODE (XEXP (PATTERN (insn), 0)) == REG)
- /* Don't move a reg that has an explicit clobber.
- We might do so sometimes, but it's not worth the pain. */
- VARRAY_CHAR (may_not_move, REGNO (XEXP (PATTERN (insn), 0))) = 1;
-
if (GET_CODE (PATTERN (insn)) == SET
|| GET_CODE (PATTERN (insn)) == CLOBBER)
- {
- dest = SET_DEST (PATTERN (insn));
- while (GET_CODE (dest) == SUBREG
- || GET_CODE (dest) == ZERO_EXTRACT
- || GET_CODE (dest) == SIGN_EXTRACT
- || GET_CODE (dest) == STRICT_LOW_PART)
- dest = XEXP (dest, 0);
- if (GET_CODE (dest) == REG)
- {
- register int regno = REGNO (dest);
- /* If this is the first setting of this reg
- in current basic block, and it was set before,
- it must be set in two basic blocks, so it cannot
- be moved out of the loop. */
- if (VARRAY_INT (n_times_set, regno) > 0
- && last_set[regno] == 0)
- VARRAY_CHAR (may_not_move, regno) = 1;
- /* If this is not first setting in current basic block,
- see if reg was used in between previous one and this.
- If so, neither one can be moved. */
- if (last_set[regno] != 0
- && reg_used_between_p (dest, last_set[regno], insn))
- VARRAY_CHAR (may_not_move, regno) = 1;
- if (VARRAY_INT (n_times_set, regno) < 127)
- ++VARRAY_INT (n_times_set, regno);
- last_set[regno] = insn;
- }
- }
+ count_one_set (insn, PATTERN (insn), may_not_move, last_set);
else if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
register int i;
for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
- {
- register rtx x = XVECEXP (PATTERN (insn), 0, i);
- if (GET_CODE (x) == CLOBBER && GET_CODE (XEXP (x, 0)) == REG)
- /* Don't move a reg that has an explicit clobber.
- It's not worth the pain to try to do it correctly. */
- VARRAY_CHAR (may_not_move, REGNO (XEXP (x, 0))) = 1;
-
- if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
- {
- dest = SET_DEST (x);
- while (GET_CODE (dest) == SUBREG
- || GET_CODE (dest) == ZERO_EXTRACT
- || GET_CODE (dest) == SIGN_EXTRACT
- || GET_CODE (dest) == STRICT_LOW_PART)
- dest = XEXP (dest, 0);
- if (GET_CODE (dest) == REG)
- {
- register int regno = REGNO (dest);
- if (VARRAY_INT (n_times_set, regno) > 0
- && last_set[regno] == 0)
- VARRAY_CHAR (may_not_move, regno) = 1;
- if (last_set[regno] != 0
- && reg_used_between_p (dest, last_set[regno], insn))
- VARRAY_CHAR (may_not_move, regno) = 1;
- if (VARRAY_INT (n_times_set, regno) < 127)
- ++VARRAY_INT (n_times_set, regno);
- last_set[regno] = insn;
- }
- }
- }
+ count_one_set (insn, XVECEXP (PATTERN (insn), 0, i),
+ may_not_move, last_set);
}
}
so that "decrement and branch until zero" insn can be used. */
check_dbra_loop (loop_end, insn_count, loop_start);
-#ifdef HAIFA
- /* record loop-variables relevant for BCT optimization before unrolling
- the loop. Unrolling may update part of this information, and the
- correct data will be used for generating the BCT. */
-#ifdef HAVE_decrement_and_branch_on_count
- if (HAVE_decrement_and_branch_on_count && bct_p)
- analyze_loop_iterations (loop_start, loop_end);
-#endif
-#endif /* HAIFA */
-
/* Create reg_map to hold substitutions for replaceable giv regs. */
reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx));
bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx));
if (unroll_p)
unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1);
-#ifdef HAIFA
- /* instrument the loop with bct insn */
#ifdef HAVE_decrement_and_branch_on_count
- if (HAVE_decrement_and_branch_on_count && bct_p)
+ /* Instrument the loop with BCT insn. */
+ if (HAVE_decrement_and_branch_on_count && bct_p
+ && flag_branch_on_count_reg)
insert_bct (loop_start, loop_end);
-#endif
-#endif /* HAIFA */
+#endif /* HAVE_decrement_and_branch_on_count */
if (loop_dump_stream)
fprintf (loop_dump_stream, "\n");
return gen_rtx_PLUS (g2->mode, mult, add);
}
\f
-/* Return 1 if giv G2 can be combined with G1. This means that G2 can use
- (either directly or via an address expression) a register used to represent
- G1. Set g2->new_reg to a represtation of G1 (normally just
- g1->dest_reg). */
+/* Return an rtx, if any, that expresses giv G2 as a function of the register
+ represented by G1. This indicates that G2 should be combined with G1 and
+ that G2 can use (either directly or via an address expression) a register
+ used to represent G1. */
static rtx
combine_givs_p (g1, g2)
/* If these givs are identical, they can be combined. We use the results
of express_from because the addends are not in a canonical form, so
rtx_equal_p is a weaker test. */
- if (tem == const0_rtx)
+ if (tem == g1->dest_reg)
{
return g1->dest_reg;
}
if (initial_value == const0_rtx
/* If we have a decrement_and_branch_on_count, prefer
the NE test, since this will allow that instruction to
- be generated. */
+ be generated. Note that we must use a vanilla loop
+ reversal if the biv is used to calculate a giv or has
+ a non-counting use. */
#if ! defined (HAVE_decrement_and_branch_until_zero) && defined (HAVE_decrement_and_branch_on_count)
- && (add_val != 1 || ! vtop)
+ && (! (add_val == 1 && vtop
+ && (bl->biv_count == 0
+ || no_use_except_counting)))
#endif
&& GET_CODE (comparison_value) == CONST_INT
/* Now do postponed overflow checks on COMPARISON_VAL. */
nonneg = 1;
cmp_code = GE;
}
- else if (add_val == 1 && vtop)
+ else if (add_val == 1 && vtop
+ && (bl->biv_count == 0
+ || no_use_except_counting))
{
add_adjust = 0;
cmp_code = NE;
like Alpha that have an IEEE compliant EQ instruction, and
a non-IEEE compliant BEQ instruction. The use of CCmode is
actually artificial, simply to prevent the combination, but
- should not affect other platforms. */
+ should not affect other platforms.
+
+ However, we must allow VOIDmode comparisons to match either
+ CCmode or non-CCmode comparison, because some ports have
+ modeless comparisons inside branch patterns.
+
+ ??? This mode check should perhaps look more like the mode check
+ in simplify_comparison in combine. */
if ((GET_CODE (SET_SRC (set)) == COMPARE
|| (((code == NE
#endif
))
&& GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'))
- && ((GET_MODE_CLASS (mode) == MODE_CC)
- == (GET_MODE_CLASS (inner_mode) == MODE_CC)))
+ && (((GET_MODE_CLASS (mode) == MODE_CC)
+ == (GET_MODE_CLASS (inner_mode) == MODE_CC))
+ || mode == VOIDmode || inner_mode == VOIDmode))
x = SET_SRC (set);
else if (((code == EQ
|| (code == GE
#endif
))
&& GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'
- && ((GET_MODE_CLASS (mode) == MODE_CC)
- == (GET_MODE_CLASS (inner_mode) == MODE_CC)))
+ && (((GET_MODE_CLASS (mode) == MODE_CC)
+ == (GET_MODE_CLASS (inner_mode) == MODE_CC))
+ || mode == VOIDmode || inner_mode == VOIDmode))
+
{
/* We might have reversed a LT to get a GE here. But this wasn't
actually the comparison of data, so we don't flag that we
XEXP (comparison, 1), XEXP (comparison, 0));
}
-#ifdef HAIFA
-/* Analyze a loop in order to instrument it with the use of count register.
- loop_start and loop_end are the first and last insns of the loop.
- This function works in cooperation with insert_bct ().
- loop_can_insert_bct[loop_num] is set according to whether the optimization
- is applicable to the loop. When it is applicable, the following variables
- are also set:
- loop_start_value[loop_num]
- loop_comparison_value[loop_num]
- loop_increment[loop_num]
- loop_comparison_code[loop_num] */
-
#ifdef HAVE_decrement_and_branch_on_count
+/* Instrument loop for insertion of bct instruction. We distinguish between
+ loops with compile-time bounds and those with run-time bounds.
+ Information from loop_iterations() is used to compute compile-time bounds.
+ Run-time bounds should use loop preconditioning, but currently ignored.
+ */
+
static void
-analyze_loop_iterations (loop_start, loop_end)
- rtx loop_start, loop_end;
+insert_bct (loop_start, loop_end)
+ rtx loop_start, loop_end;
{
- rtx comparison, comparison_value;
- rtx iteration_var, initial_value, increment;
- enum rtx_code comparison_code;
-
- rtx last_loop_insn;
- rtx insn;
int i;
+ unsigned HOST_WIDE_INT n_iterations;
+ rtx insn;
- /* loop_variable mode */
- enum machine_mode original_mode;
-
- /* find the number of the loop */
- int loop_num = uid_loop_num [INSN_UID (loop_start)];
-
- /* we change our mind only when we are sure that loop will be instrumented */
- loop_can_insert_bct[loop_num] = 0;
-
- /* is the optimization suppressed. */
- if ( !flag_branch_on_count_reg )
- return;
-
- /* make sure that count-reg is not in use */
- if (loop_used_count_register[loop_num]){
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n",
- loop_num);
- return;
- }
-
- /* make sure that the function has no indirect jumps. */
- if (indirect_jump_in_function){
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n",
- loop_num);
- return;
- }
-
- /* make sure that the last loop insn is a conditional jump */
- last_loop_insn = PREV_INSN (loop_end);
- if (GET_CODE (last_loop_insn) != JUMP_INSN || !condjump_p (last_loop_insn)) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n",
- loop_num);
- return;
- }
-
- /* First find the iteration variable. If the last insn is a conditional
- branch, and the insn preceding it tests a register value, make that
- register the iteration variable. */
-
- /* We used to use prev_nonnote_insn here, but that fails because it might
- accidentally get the branch for a contained loop if the branch for this
- loop was deleted. We can only trust branches immediately before the
- loop_end. */
+ int increment_direction, compare_direction;
- comparison = get_condition_for_loop (last_loop_insn);
- /* ??? Get_condition may switch position of induction variable and
- invariant register when it canonicalizes the comparison. */
+ /* If the loop condition is <= or >=, the number of iteration
+ is 1 more than the range of the bounds of the loop. */
+ int add_iteration = 0;
- if (comparison == 0) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n",
- loop_num);
- return;
- }
+ enum machine_mode loop_var_mode = word_mode;
- comparison_code = GET_CODE (comparison);
- iteration_var = XEXP (comparison, 0);
- comparison_value = XEXP (comparison, 1);
+ int loop_num = uid_loop_num [INSN_UID (loop_start)];
- original_mode = GET_MODE (iteration_var);
- if (GET_MODE_CLASS (original_mode) != MODE_INT
- || GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n",
- loop_num);
+ /* It's impossible to instrument a competely unrolled loop. */
+ if (loop_unroll_factor [loop_num] == -1)
return;
- }
-
- /* get info about loop bounds and increment */
- iteration_info (iteration_var, &initial_value, &increment,
- loop_start, loop_end);
- /* make sure that all required loop data were found */
- if (!(initial_value && increment && comparison_value
- && invariant_p (comparison_value) && invariant_p (increment)
- && ! indirect_jump_in_function))
+ /* Make sure that the count register is not in use. */
+ if (loop_used_count_register [loop_num])
{
- if (loop_dump_stream) {
+ if (loop_dump_stream)
fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num);
- if (!(initial_value && increment && comparison_value)) {
- fprintf (loop_dump_stream, "\tbounds not available: ");
- if ( ! initial_value )
- fprintf (loop_dump_stream, "initial ");
- if ( ! increment )
- fprintf (loop_dump_stream, "increment ");
- if ( ! comparison_value )
- fprintf (loop_dump_stream, "comparison ");
- fprintf (loop_dump_stream, "\n");
- }
- if (!invariant_p (comparison_value) || !invariant_p (increment))
- fprintf (loop_dump_stream, "\tloop bounds not invariant\n");
- }
+ "insert_bct %d: BCT instrumentation failed: count register already in use\n",
+ loop_num);
return;
}
- /* make sure that the increment is constant */
- if (GET_CODE (increment) != CONST_INT) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n",
- loop_num);
- return;
- }
-
- /* make sure that the loop contains neither function call, nor jump on table.
- (the count register might be altered by the called function, and might
- be used for a branch on table). */
- for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) {
- if (GET_CODE (insn) == CALL_INSN){
+ /* Make sure that the function has no indirect jumps. */
+ if (indirect_jump_in_function)
+ {
if (loop_dump_stream)
fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n",
- loop_num);
+ "insert_bct %d: BCT instrumentation failed: indirect jump in function\n",
+ loop_num);
return;
}
- if (GET_CODE (insn) == JUMP_INSN
- && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
- || GET_CODE (PATTERN (insn)) == ADDR_VEC)){
+ /* Make sure that the last loop insn is a conditional jump. */
+ if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
+ || ! condjump_p (PREV_INSN (loop_end))
+ || simplejump_p (PREV_INSN (loop_end)))
+ {
if (loop_dump_stream)
fprintf (loop_dump_stream,
- "analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n",
- loop_num);
+ "insert_bct %d: BCT instrumentation failed: invalid jump at loop end\n",
+ loop_num);
return;
}
- }
-
- /* At this point, we are sure that the loop can be instrumented with BCT.
- Some of the loops, however, will not be instrumented - the final decision
- is taken by insert_bct () */
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n",
- loop_num);
-
- /* mark all enclosing loops that they cannot use count register */
- /* ???: In fact, since insert_bct may decide not to instrument this loop,
- marking here may prevent instrumenting an enclosing loop that could
- actually be instrumented. But since this is rare, it is safer to mark
- here in case the order of calling (analyze/insert)_bct would be changed. */
- for (i=loop_num; i != -1; i = loop_outer_loop[i])
- loop_used_count_register[i] = 1;
-
- /* Set data structures which will be used by the instrumentation phase */
- loop_start_value[loop_num] = initial_value;
- loop_comparison_value[loop_num] = comparison_value;
- loop_increment[loop_num] = increment;
- loop_comparison_code[loop_num] = comparison_code;
- loop_can_insert_bct[loop_num] = 1;
-}
-
-
-/* instrument loop for insertion of bct instruction. We distinguish between
- loops with compile-time bounds, to those with run-time bounds. The loop
- behaviour is analized according to the following characteristics/variables:
- ; Input variables:
- ; comparison-value: the value to which the iteration counter is compared.
- ; initial-value: iteration-counter initial value.
- ; increment: iteration-counter increment.
- ; Computed variables:
- ; increment-direction: the sign of the increment.
- ; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE.
- ; range-direction: sign (comparison-value - initial-value)
- We give up on the following cases:
- ; loop variable overflow.
- ; run-time loop bounds with comparison code NE.
- */
-
-static void
-insert_bct (loop_start, loop_end)
- rtx loop_start, loop_end;
-{
- rtx initial_value, comparison_value, increment;
- enum rtx_code comparison_code;
-
- int increment_direction, compare_direction;
- int unsigned_p = 0;
-
- /* if the loop condition is <= or >=, the number of iteration
- is 1 more than the range of the bounds of the loop */
- int add_iteration = 0;
-
- /* the only machine mode we work with - is the integer of the size that the
- machine has */
- enum machine_mode loop_var_mode = word_mode;
-
- int loop_num = uid_loop_num [INSN_UID (loop_start)];
-
- /* get loop-variables. No need to check that these are valid - already
- checked in analyze_loop_iterations (). */
- comparison_code = loop_comparison_code[loop_num];
- initial_value = loop_start_value[loop_num];
- comparison_value = loop_comparison_value[loop_num];
- increment = loop_increment[loop_num];
-
- /* check analyze_loop_iterations decision for this loop. */
- if (! loop_can_insert_bct[loop_num]){
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n",
- loop_num);
- return;
- }
-
- /* It's impossible to instrument a competely unrolled loop. */
- if (loop_unroll_factor [loop_num] == -1)
- return;
-
- /* make sure that the last loop insn is a conditional jump .
- This check is repeated from analyze_loop_iterations (),
- because unrolling might have changed that. */
- if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
- || !condjump_p (PREV_INSN (loop_end))) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: not instrumenting BCT because of invalid branch\n");
- return;
- }
-
- /* fix increment in case loop was unrolled. */
- if (loop_unroll_factor [loop_num] > 1)
- increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor [loop_num] );
-
- /* determine properties and directions of the loop */
- increment_direction = (INTVAL (increment) > 0) ? 1:-1;
- switch ( comparison_code ) {
- case LEU:
- unsigned_p = 1;
- /* fallthrough */
- case LE:
- compare_direction = 1;
- add_iteration = 1;
- break;
- case GEU:
- unsigned_p = 1;
- /* fallthrough */
- case GE:
- compare_direction = -1;
- add_iteration = 1;
- break;
- case EQ:
- /* in this case we cannot know the number of iterations */
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: %d: loop cannot be instrumented: == in condition\n",
- loop_num);
- return;
- case LTU:
- unsigned_p = 1;
- /* fallthrough */
- case LT:
- compare_direction = 1;
- break;
- case GTU:
- unsigned_p = 1;
- /* fallthrough */
- case GT:
- compare_direction = -1;
- break;
- case NE:
- compare_direction = 0;
- break;
- default:
- abort ();
- }
-
-
- /* make sure that the loop does not end by an overflow */
- if (compare_direction != increment_direction) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: %d: loop cannot be instrumented: terminated by overflow\n",
- loop_num);
- return;
- }
-
- /* try to instrument the loop. */
- /* Handle the simpler case, where the bounds are known at compile time. */
- if (GET_CODE (initial_value) == CONST_INT
- && GET_CODE (comparison_value) == CONST_INT)
+ /* Make sure that the loop does not contain a function call
+ (the count register might be altered by the called function). */
+ if (loop_has_call)
{
- int n_iterations;
- int increment_value_abs = INTVAL (increment) * increment_direction;
-
- /* check the relation between compare-val and initial-val */
- int difference = INTVAL (comparison_value) - INTVAL (initial_value);
- int range_direction = (difference > 0) ? 1 : -1;
-
- /* make sure the loop executes enough iterations to gain from BCT */
- if (difference > -3 && difference < 3) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: loop %d not BCT instrumented: too small iteration count.\n",
- loop_num);
- return;
- }
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct %d: BCT instrumentation failed: function call in loop\n",
+ loop_num);
+ return;
+ }
- /* make sure that the loop executes at least once */
- if ((range_direction == 1 && compare_direction == -1)
- || (range_direction == -1 && compare_direction == 1))
+ /* Make sure that the loop does not jump via a table.
+ (the count register might be used to perform the branch on table). */
+ for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == JUMP_INSN
+ && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_VEC))
{
if (loop_dump_stream)
fprintf (loop_dump_stream,
- "insert_bct: loop %d: does not iterate even once. Not instrumenting.\n",
- loop_num);
+ "insert_bct %d: BCT instrumentation failed: computed branch in the loop\n",
+ loop_num);
return;
}
+ }
- /* make sure that the loop does not end by an overflow (in compile time
- bounds we must have an additional check for overflow, because here
- we also support the compare code of 'NE'. */
- if (comparison_code == NE
- && increment_direction != range_direction) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n",
- loop_num);
- return;
- }
+ /* Account for loop unrolling in instrumented iteration count. */
+ if (loop_unroll_factor [loop_num] > 1)
+ n_iterations = loop_n_iterations / loop_unroll_factor [loop_num];
+ else
+ n_iterations = loop_n_iterations;
- /* Determine the number of iterations by:
- ;
- ; compare-val - initial-val + (increment -1) + additional-iteration
- ; num_iterations = -----------------------------------------------------------------
- ; increment
- */
- difference = (range_direction > 0) ? difference : -difference;
-#if 0
- fprintf (stderr, "difference is: %d\n", difference); /* @*/
- fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/
- fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/
- fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/
- fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/
-#endif
+ if (n_iterations != 0 && n_iterations < 3)
+ {
+ /* Allow an enclosing outer loop to benefit if possible. */
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct %d: Too few iterations to benefit from BCT optimization\n",
+ loop_num);
+ return;
+ }
- if (increment_value_abs == 0) {
- fprintf (stderr, "insert_bct: error: increment == 0 !!!\n");
- abort ();
- }
- n_iterations = (difference + increment_value_abs - 1 + add_iteration)
- / increment_value_abs;
+ /* Try to instrument the loop. */
-#if 0
- fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/
-#endif
+ /* Handle the simpler case, where the bounds are known at compile time. */
+ if (n_iterations > 0)
+ {
+ /* Mark all enclosing loops that they cannot use count register. */
+ for (i=loop_num; i != -1; i = loop_outer_loop[i])
+ loop_used_count_register[i] = 1;
instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
+ return;
+ }
- /* Done with this loop. */
+ /* Handle the more complex case, that the bounds are NOT known
+ at compile time. In this case we generate run_time calculation
+ of the number of iterations. */
+
+ if (loop_iteration_var == 0)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct %d: BCT Runtime Instrumentation failed: no loop iteration variable found\n",
+ loop_num);
return;
}
- /* Handle the more complex case, that the bounds are NOT known at compile time. */
- /* In this case we generate run_time calculation of the number of iterations */
+ if (GET_MODE_CLASS (GET_MODE (loop_iteration_var)) != MODE_INT
+ || GET_MODE_SIZE (GET_MODE (loop_iteration_var)) != UNITS_PER_WORD)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct %d: BCT Runtime Instrumentation failed: loop variable not integer\n",
+ loop_num);
+ return;
+ }
/* With runtime bounds, if the compare is of the form '!=' we give up */
- if (comparison_code == NE) {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: fail for loop %d: runtime bounds with != comparison\n",
- loop_num);
- return;
- }
+ if (loop_comparison_code == NE)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct %d: BCT Runtime Instrumentation failed: runtime bounds with != comparison\n",
+ loop_num);
+ return;
+ }
+/* Use common loop preconditioning code instead. */
+#if 0
+ else
+ {
+ /* We rely on the existence of run-time guard to ensure that the
+ loop executes at least once. */
+ rtx sequence;
+ rtx iterations_num_reg;
- else {
- /* We rely on the existence of run-time guard to ensure that the
- loop executes at least once. */
- rtx sequence;
- rtx iterations_num_reg;
+ unsigned HOST_WIDE_INT increment_value_abs
+ = INTVAL (increment) * increment_direction;
- int increment_value_abs = INTVAL (increment) * increment_direction;
+ /* make sure that the increment is a power of two, otherwise (an
+ expensive) divide is needed. */
+ if (exact_log2 (increment_value_abs) == -1)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "insert_bct: not instrumenting BCT because the increment is not power of 2\n");
+ return;
+ }
- /* make sure that the increment is a power of two, otherwise (an
- expensive) divide is needed. */
- if (exact_log2 (increment_value_abs) == -1)
+ /* compute the number of iterations */
+ start_sequence ();
{
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: not instrumenting BCT because the increment is not power of 2\n");
- return;
- }
-
- /* compute the number of iterations */
- start_sequence ();
- {
- rtx temp_reg;
+ rtx temp_reg;
- /* Again, the number of iterations is calculated by:
- ;
- ; compare-val - initial-val + (increment -1) + additional-iteration
- ; num_iterations = -----------------------------------------------------------------
- ; increment
+ /* Again, the number of iterations is calculated by:
+ ;
+ ; compare-val - initial-val + (increment -1) + additional-iteration
+ ; num_iterations = -----------------------------------------------------------------
+ ; increment
*/
- /* ??? Do we have to call copy_rtx here before passing rtx to
- expand_binop? */
- if (compare_direction > 0) {
- /* <, <= :the loop variable is increasing */
- temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value,
- initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
- }
- else {
- temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value,
- comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
- }
+ /* ??? Do we have to call copy_rtx here before passing rtx to
+ expand_binop? */
+ if (compare_direction > 0)
+ {
+ /* <, <= :the loop variable is increasing */
+ temp_reg = expand_binop (loop_var_mode, sub_optab,
+ comparison_value, initial_value,
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ }
+ else
+ {
+ temp_reg = expand_binop (loop_var_mode, sub_optab,
+ initial_value, comparison_value,
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ }
- if (increment_value_abs - 1 + add_iteration != 0)
- temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
- GEN_INT (increment_value_abs - 1 + add_iteration),
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ if (increment_value_abs - 1 + add_iteration != 0)
+ temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
+ GEN_INT (increment_value_abs - 1
+ + add_iteration),
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
- if (increment_value_abs != 1)
- {
- /* ??? This will generate an expensive divide instruction for
- most targets. The original authors apparently expected this
- to be a shift, since they test for power-of-2 divisors above,
- but just naively generating a divide instruction will not give
- a shift. It happens to work for the PowerPC target because
- the rs6000.md file has a divide pattern that emits shifts.
- It will probably not work for any other target. */
- iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
- temp_reg,
- GEN_INT (increment_value_abs),
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
- }
- else
- iterations_num_reg = temp_reg;
+ if (increment_value_abs != 1)
+ {
+ /* ??? This will generate an expensive divide instruction for
+ most targets. The original authors apparently expected this
+ to be a shift, since they test for power-of-2 divisors above,
+ but just naively generating a divide instruction will not give
+ a shift. It happens to work for the PowerPC target because
+ the rs6000.md file has a divide pattern that emits shifts.
+ It will probably not work for any other target. */
+ iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
+ temp_reg,
+ GEN_INT (increment_value_abs),
+ NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ }
+ else
+ iterations_num_reg = temp_reg;
+ }
+ sequence = gen_sequence ();
+ end_sequence ();
+ emit_insn_before (sequence, loop_start);
+ instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
}
- sequence = gen_sequence ();
- end_sequence ();
- emit_insn_before (sequence, loop_start);
- instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
- }
+
+ return;
+#endif /* Complex case */
}
-/* instrument loop by inserting a bct in it. This is done in the following way:
- 1. A new register is created and assigned the hard register number of the count
- register.
- 2. In the head of the loop the new variable is initialized by the value passed in the
- loop_num_iterations parameter.
+/* Instrument loop by inserting a bct in it as follows:
+ 1. A new counter register is created.
+ 2. In the head of the loop the new variable is initialized to the value
+ passed in the loop_num_iterations parameter.
3. At the end of the loop, comparison of the register with 0 is generated.
- The created comparison follows the pattern defined for the
- decrement_and_branch_on_count insn, so this insn will be generated in assembly
- generation phase.
- 4. The compare&branch on the old variable is deleted. So, if the loop-variable was
- not used elsewhere, it will be eliminated by data-flow analisys. */
+ The created comparison follows the pattern defined for the
+ decrement_and_branch_on_count insn, so this insn will be generated.
+ 4. The branch on the old variable are deleted. The compare must remain
+ because it might be used elsewhere. If the loop-variable or condition
+ register are used elsewhere, they will be eliminated by flow. */
static void
instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
rtx loop_start, loop_end;
rtx loop_num_iterations;
{
- rtx temp_reg1, temp_reg2;
+ rtx counter_reg;
rtx start_label;
-
rtx sequence;
- enum machine_mode loop_var_mode = word_mode;
if (HAVE_decrement_and_branch_on_count)
{
if (loop_dump_stream)
- fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
+ {
+ fputs ("instrument_bct: Inserting BCT (", loop_dump_stream);
+ if (GET_CODE (loop_num_iterations) == CONST_INT)
+ fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
+ INTVAL (loop_num_iterations));
+ else
+ fputs ("runtime", loop_dump_stream);
+ fputs (" iterations)", loop_dump_stream);
+ }
/* Discard original jump to continue loop. Original compare result
may still be live, so it cannot be discarded explicitly. */
delete_insn (PREV_INSN (loop_end));
- /* insert the label which will delimit the start of the loop */
+ /* Insert the label which will delimit the start of the loop. */
start_label = gen_label_rtx ();
emit_label_after (start_label, loop_start);
- /* insert initialization of the count register into the loop header */
+ /* Insert initialization of the count register into the loop header. */
start_sequence ();
- temp_reg1 = gen_reg_rtx (loop_var_mode);
- emit_insn (gen_move_insn (temp_reg1, loop_num_iterations));
-
- /* this will be count register */
- temp_reg2 = gen_rtx_REG (loop_var_mode, COUNT_REGISTER_REGNUM);
- /* we have to move the value to the count register from an GPR
- because rtx pointed to by loop_num_iterations could contain
- expression which cannot be moved into count register */
- emit_insn (gen_move_insn (temp_reg2, temp_reg1));
-
+ counter_reg = gen_reg_rtx (word_mode);
+ emit_insn (gen_move_insn (counter_reg, loop_num_iterations));
sequence = gen_sequence ();
end_sequence ();
emit_insn_before (sequence, loop_start);
- /* insert new comparison on the count register instead of the
+ /* Insert new comparison on the count register instead of the
old one, generating the needed BCT pattern (that will be
later recognized by assembly generation phase). */
- emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2,
+ emit_jump_insn_before (gen_decrement_and_branch_on_count (counter_reg,
start_label),
loop_end);
LABEL_NUSES (start_label)++;
}
#endif /* HAVE_decrement_and_branch_on_count */
-#endif /* HAIFA */
-
/* Scan the function and determine whether it has indirect (computed) jumps.
This is taken mostly from flow.c; similar code exists elsewhere