+2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * config/arc/arc-c.c (__ARC_LPC_WIDTH__): Add builtin define.
+ * config/arc/arc.c (ARC_MAX_LOOP_LENGTH): Define.
+ (arc_conditional_register_usage): Remove ARC600 lp_count
+ exception.
+ (arc_file_start): Emit Tag_ARC_CPU_variation.
+ (arc_can_use_doloop_p): New conditions to use ZOLs.
+ (hwloop_fail): New function.
+ (hwloop_optimize): Likewise.
+ (hwloop_pattern_reg): Likewise.
+ (arc_doloop_hooks): New struct, to be used with reorg_loops.
+ (arc_reorg_loops): New function, calls reorg_loops.
+ (arc_reorg): Call arc_reorg_loops. Remove old ZOL handling.
+ (arc600_corereg_hazard): Remove ZOL checking, case handled by
+ hwloop_optimize.
+ (arc_loop_hazard): Remove function, functionality moved into
+ hwloop_optimize.
+ (arc_hazard): Remove arc_loop_hazard call.
+ (arc_adjust_insn_length): Remove ZOL handling, functionality moved
+ into hwloop_optimize.
+ (arc_label_align): Remove ZOL handling.
+ * config/arc/arc.h (LOOP_ALIGN): Changed to 0.
+ * config/arc/arc.md (doloop_begin): Remove pattern.
+ (doloop_begin_i): Likewise.
+ (doloop_end_i): Likewise.
+ (doloop_fallback): Likewise.
+ (doloop_fallback_m): Likewise.
+ (doloop_end): Reimplement expand.
+ (arc_lp): New pattern for LP instruction.
+ (loop_end): New pattern.
+ (loop_fail): Likewise.
+ (decrement_and_branch_until_zero): Likewise.
+ * config/arc/arc.opt (mlpc-width): New option.
+ * doc/invoke.texi (mlpc-width): Document option.
+
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (arc_ifcvt): Remove use of merge_blocks call.
builtin_define_with_int_value ("__ARC_TLS_REGNO__",
arc_tp_regno);
+ builtin_define_with_int_value ("__ARC_LPC_WIDTH__", arc_lpcwidth);
+
builtin_define (TARGET_BIG_ENDIAN
? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__");
if (TARGET_BIG_ENDIAN)
#include "rtl-iter.h"
#include "alias.h"
#include "opts.h"
+#include "hw-doloop.h"
/* Which cpu we're compiling for (ARC600, ARC601, ARC700). */
static char arc_cpu_name[10] = "";
static const char *arc_cpu_string = arc_cpu_name;
+/* Maximum size of a loop. */
+#define ARC_MAX_LOOP_LENGTH 4095
+
/* ??? Loads can handle any constant, stores can only handle small ones. */
/* OTOH, LIMMs cost extra, so their usefulness is limited. */
#define RTX_OK_FOR_OFFSET_P(MODE, X) \
i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
reg_alloc_order [i] = i;
}
- /* For ARC600, lp_count may not be read in an instruction
- following immediately after another one setting it to a new value.
- There was some discussion on how to enforce scheduling constraints for
- processors with missing interlocks on the gcc mailing list:
- http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
- However, we can't actually use this approach, because for ARC the
- delay slot scheduling pass is active, which runs after
- machine_dependent_reorg. */
- if (TARGET_ARC600)
- CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
- else if (!TARGET_LP_WR_INTERLOCK)
- fixed_regs[LP_COUNT] = 1;
+
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (!call_used_regs[regno])
CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
/* Implement TARGET_CAN_USE_DOLOOP_P. */
static bool
-arc_can_use_doloop_p (const widest_int &iterations, const widest_int &,
+arc_can_use_doloop_p (const widest_int &,
+ const widest_int &iterations_max,
unsigned int loop_depth, bool entered_at_top)
{
- if (loop_depth > 1)
+ /* Considering limitations in the hardware, only use doloop
+ for innermost loops which must be entered from the top. */
+ if (loop_depth > 1 || !entered_at_top)
return false;
- /* Setting up the loop with two sr instructions costs 6 cycles. */
- if (TARGET_ARC700
- && !entered_at_top
- && wi::gtu_p (iterations, 0)
- && wi::leu_p (iterations, flag_pic ? 6 : 3))
+
+ /* Check for lp_count width boundary. */
+ if (arc_lpcwidth != 32
+ && (wi::gtu_p (iterations_max, ((1 << arc_lpcwidth) - 1))
+ || wi::eq_p (iterations_max, 0)))
return false;
return true;
}
-/* NULL if INSN insn is valid within a low-overhead loop.
- Otherwise return why doloop cannot be applied. */
+/* NULL if INSN insn is valid within a low-overhead loop. Otherwise
+ return why doloop cannot be applied. */
static const char *
arc_invalid_within_doloop (const rtx_insn *insn)
{
if (CALL_P (insn))
return "Function call in the loop.";
+
+ /* FIXME! add here all the ZOL exceptions. */
return NULL;
}
}
}
+/* A callback for the hw-doloop pass. Called when a loop we have discovered
+ turns out not to be optimizable; we have to split the loop_end pattern into
+ a subtract and a test. */
+
+static void
+hwloop_fail (hwloop_info loop)
+{
+ rtx test;
+ rtx insn = loop->loop_end;
+
+ if (TARGET_V2
+ && (loop->length && (loop->length <= ARC_MAX_LOOP_LENGTH))
+ && REG_P (loop->iter_reg))
+ {
+ /* TARGET_V2 has dbnz instructions. */
+ test = gen_dbnz (loop->iter_reg, loop->start_label);
+ insn = emit_jump_insn_before (test, loop->loop_end);
+ }
+ else if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg) == LP_COUNT))
+ {
+ /* We have the lp_count as loop iterator, try to use it. */
+ emit_insn_before (gen_loop_fail (), loop->loop_end);
+ test = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG),
+ const0_rtx);
+ test = gen_rtx_IF_THEN_ELSE (VOIDmode, test,
+ gen_rtx_LABEL_REF (Pmode, loop->start_label),
+ pc_rtx);
+ insn = emit_jump_insn_before (gen_rtx_SET (pc_rtx, test),
+ loop->loop_end);
+ }
+ else
+ {
+ emit_insn_before (gen_addsi3 (loop->iter_reg,
+ loop->iter_reg,
+ constm1_rtx),
+ loop->loop_end);
+ test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
+ insn = emit_jump_insn_before (gen_cbranchsi4 (test,
+ loop->iter_reg,
+ const0_rtx,
+ loop->start_label),
+ loop->loop_end);
+ }
+ JUMP_LABEL (insn) = loop->start_label;
+ LABEL_NUSES (loop->start_label)++;
+ delete_insn (loop->loop_end);
+}
+
+/* Optimize LOOP. */
+
+static bool
+hwloop_optimize (hwloop_info loop)
+{
+ int i;
+ edge entry_edge;
+ basic_block entry_bb, bb;
+ rtx iter_reg, end_label;
+ rtx_insn *insn, *seq, *entry_after, *last_insn;
+ unsigned int length;
+ bool need_fix = false;
+ rtx lp_reg = gen_rtx_REG (SImode, LP_COUNT);
+
+ if (loop->depth > 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is not innermost\n",
+ loop->loop_no);
+ return false;
+ }
+
+ if (!loop->incoming_dest)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has more than one entry\n",
+ loop->loop_no);
+ return false;
+ }
+
+ if (loop->incoming_dest != loop->head)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is not entered from head\n",
+ loop->loop_no);
+ return false;
+ }
+
+ if (loop->has_call || loop->has_asm)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has invalid insn\n",
+ loop->loop_no);
+ return false;
+ }
+
+ /* Scan all the blocks to make sure they don't use iter_reg. */
+ if (loop->iter_reg_used || loop->iter_reg_used_outside)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d uses iterator\n",
+ loop->loop_no);
+ return false;
+ }
+
+ /* Check if start_label appears before doloop_end. */
+ length = 0;
+ for (insn = loop->start_label;
+ insn && insn != loop->loop_end;
+ insn = NEXT_INSN (insn))
+ length += NONDEBUG_INSN_P (insn) ? get_attr_length (insn) : 0;
+
+ if (!insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+ loop->loop_no);
+ return false;
+ }
+
+ loop->length = length;
+ if (loop->length > ARC_MAX_LOOP_LENGTH)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+ return false;
+ }
+
+ /* Check if we use a register or not. */
+ if (!REG_P (loop->iter_reg))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d iterator is MEM\n",
+ loop->loop_no);
+ return false;
+ }
+
+ /* Check if loop register is lpcount. */
+ if (REG_P (loop->iter_reg) && (REGNO (loop->iter_reg)) != LP_COUNT)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d doesn't use lp_count as loop"
+ " iterator\n",
+ loop->loop_no);
+ /* This loop doesn't use the lp_count, check though if we can
+ fix it. */
+ if (TEST_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT)
+ /* In very unique cases we may have LP_COUNT alive. */
+ || (loop->incoming_src
+ && REGNO_REG_SET_P (df_get_live_out (loop->incoming_src),
+ LP_COUNT)))
+ return false;
+ else
+ need_fix = true;
+ }
+
+ /* Check for control like instruction as the last instruction of a
+ ZOL. */
+ bb = loop->tail;
+ last_insn = PREV_INSN (loop->loop_end);
+
+ while (1)
+ {
+ for (; last_insn != BB_HEAD (bb);
+ last_insn = PREV_INSN (last_insn))
+ if (NONDEBUG_INSN_P (last_insn))
+ break;
+
+ if (last_insn != BB_HEAD (bb))
+ break;
+
+ if (single_pred_p (bb)
+ && single_pred_edge (bb)->flags & EDGE_FALLTHRU
+ && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ {
+ bb = single_pred (bb);
+ last_insn = BB_END (bb);
+ continue;
+ }
+ else
+ {
+ last_insn = NULL;
+ break;
+ }
+ }
+
+ if (!last_insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has no last instruction\n",
+ loop->loop_no);
+ return false;
+ }
+
+ if ((TARGET_ARC600_FAMILY || TARGET_HS)
+ && INSN_P (last_insn)
+ && (JUMP_P (last_insn) || CALL_P (last_insn)
+ || GET_CODE (PATTERN (last_insn)) == SEQUENCE
+ || get_attr_type (last_insn) == TYPE_BRCC
+ || get_attr_type (last_insn) == TYPE_BRCC_NO_DELAY_SLOT))
+ {
+ if (loop->length + 2 > ARC_MAX_LOOP_LENGTH)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+ return false;
+ }
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has a control like last insn;"
+ "add a nop\n",
+ loop->loop_no);
+
+ last_insn = emit_insn_after (gen_nopv (), last_insn);
+ }
+
+ if (LABEL_P (last_insn))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has a label as last insn;"
+ "add a nop\n",
+ loop->loop_no);
+ last_insn = emit_insn_after (gen_nopv (), last_insn);
+ }
+ loop->last_insn = last_insn;
+
+ /* Get the loop iteration register. */
+ iter_reg = loop->iter_reg;
+
+ gcc_assert (REG_P (iter_reg));
+
+ entry_edge = NULL;
+
+ FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
+ if (entry_edge->flags & EDGE_FALLTHRU)
+ break;
+
+ if (entry_edge == NULL)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has no fallthru edge jumping"
+ "into the loop\n",
+ loop->loop_no);
+ return false;
+ }
+ /* The loop is good. */
+ end_label = gen_label_rtx ();
+ loop->end_label = end_label;
+
+ /* Place the zero_cost_loop_start instruction before the loop. */
+ entry_bb = entry_edge->src;
+
+ start_sequence ();
+
+ if (need_fix)
+ {
+ /* The loop uses a R-register, but the lp_count is free, thus
+ use lp_count. */
+ emit_insn (gen_movsi (lp_reg, iter_reg));
+ SET_HARD_REG_BIT (loop->regs_set_in_loop, LP_COUNT);
+ iter_reg = lp_reg;
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; fix loop %d to use lp_count\n",
+ loop->loop_no);
+ }
+ }
+
+ insn = emit_insn (gen_arc_lp (iter_reg,
+ loop->start_label,
+ loop->end_label));
+
+ seq = get_insns ();
+ end_sequence ();
+
+ entry_after = BB_END (entry_bb);
+ if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
+ || !entry_after)
+ {
+ basic_block new_bb;
+ edge e;
+ edge_iterator ei;
+
+ emit_insn_before (seq, BB_HEAD (loop->head));
+ seq = emit_label_before (gen_label_rtx (), seq);
+ new_bb = create_basic_block (seq, insn, entry_bb);
+ FOR_EACH_EDGE (e, ei, loop->incoming)
+ {
+ if (!(e->flags & EDGE_FALLTHRU))
+ redirect_edge_and_branch_force (e, new_bb);
+ else
+ redirect_edge_succ (e, new_bb);
+ }
+
+ make_edge (new_bb, loop->head, 0);
+ }
+ else
+ {
+#if 0
+ while (DEBUG_INSN_P (entry_after)
+ || (NOTE_P (entry_after)
+ && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK
+ /* Make sure we don't split a call and its corresponding
+ CALL_ARG_LOCATION note. */
+ && NOTE_KIND (entry_after) != NOTE_INSN_CALL_ARG_LOCATION))
+ entry_after = NEXT_INSN (entry_after);
+#endif
+ entry_after = next_nonnote_insn_bb (entry_after);
+
+ gcc_assert (entry_after);
+ emit_insn_before (seq, entry_after);
+ }
+
+ delete_insn (loop->loop_end);
+ /* Insert the loop end label before the last instruction of the
+ loop. */
+ emit_label_after (end_label, loop->last_insn);
+
+ return true;
+}
+
+/* A callback for the hw-doloop pass. This function examines INSN; if
+ it is a loop_end pattern we recognize, return the reg rtx for the
+ loop counter. Otherwise, return NULL_RTX. */
+
+static rtx
+hwloop_pattern_reg (rtx_insn *insn)
+{
+ rtx reg;
+
+ if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
+ return NULL_RTX;
+
+ reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1));
+ if (!REG_P (reg))
+ return NULL_RTX;
+ return reg;
+}
+
+static struct hw_doloop_hooks arc_doloop_hooks =
+{
+ hwloop_pattern_reg,
+ hwloop_optimize,
+ hwloop_fail
+};
+
+/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
+ and tries to rewrite the RTL of these loops so that proper Blackfin
+ hardware loops are generated. */
+
+static void
+arc_reorg_loops (void)
+{
+ reorg_loops (true, &arc_doloop_hooks);
+}
+
static int arc_reorg_in_progress = 0;
/* ARC's machince specific reorg function. */
long offset;
int changed;
- workaround_arc_anomaly ();
-
cfun->machine->arc_reorg_started = 1;
arc_reorg_in_progress = 1;
- /* Link up loop ends with their loop start. */
- {
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- if (GET_CODE (insn) == JUMP_INSN
- && recog_memoized (insn) == CODE_FOR_doloop_end_i)
- {
- rtx_insn *top_label
- = as_a <rtx_insn *> (XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0));
- rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
- rtx_insn *lp, *prev = prev_nonnote_insn (top_label);
- rtx_insn *lp_simple = NULL;
- rtx_insn *next = NULL;
- rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
- int seen_label = 0;
-
- for (lp = prev;
- (lp && NONJUMP_INSN_P (lp)
- && recog_memoized (lp) != CODE_FOR_doloop_begin_i);
- lp = prev_nonnote_insn (lp))
- ;
- if (!lp || !NONJUMP_INSN_P (lp)
- || dead_or_set_regno_p (lp, LP_COUNT))
- {
- HOST_WIDE_INT loop_end_id
- = INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0));
-
- for (prev = next = insn, lp = NULL ; prev || next;)
- {
- if (prev)
- {
- if (NONJUMP_INSN_P (prev)
- && recog_memoized (prev) == CODE_FOR_doloop_begin_i
- && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
- == loop_end_id))
- {
- lp = prev;
- break;
- }
- else if (LABEL_P (prev))
- seen_label = 1;
- prev = prev_nonnote_insn (prev);
- }
- if (next)
- {
- if (NONJUMP_INSN_P (next)
- && recog_memoized (next) == CODE_FOR_doloop_begin_i
- && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
- == loop_end_id))
- {
- lp = next;
- break;
- }
- next = next_nonnote_insn (next);
- }
- }
- prev = NULL;
- }
- else
- lp_simple = lp;
- if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
- {
- rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
- if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
- /* The loop end insn has been duplicated. That can happen
- when there is a conditional block at the very end of
- the loop. */
- goto failure;
- /* If Register allocation failed to allocate to the right
- register, There is no point into teaching reload to
- fix this up with reloads, as that would cost more
- than using an ordinary core register with the
- doloop_fallback pattern. */
- if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
- /* Likewise, if the loop setup is evidently inside the loop,
- we loose. */
- || (!lp_simple && lp != next && !seen_label))
- {
- remove_insn (lp);
- goto failure;
- }
- /* It is common that the optimizers copy the loop count from
- another register, and doloop_begin_i is stuck with the
- source of the move. Making doloop_begin_i only accept "l"
- is nonsentical, as this then makes reload evict the pseudo
- used for the loop end. The underlying cause is that the
- optimizers don't understand that the register allocation for
- doloop_begin_i should be treated as part of the loop.
- Try to work around this problem by verifying the previous
- move exists. */
- if (true_regnum (begin_cnt) != LP_COUNT)
- {
- rtx_insn *mov;
- rtx set, note;
+ compute_bb_for_insn ();
- for (mov = prev_nonnote_insn (lp); mov;
- mov = prev_nonnote_insn (mov))
- {
- if (!NONJUMP_INSN_P (mov))
- mov = 0;
- else if ((set = single_set (mov))
- && rtx_equal_p (SET_SRC (set), begin_cnt)
- && rtx_equal_p (SET_DEST (set), op0))
- break;
- }
- if (mov)
- {
- XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
- note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
- if (note)
- remove_note (lp, note);
- }
- else
- {
- remove_insn (lp);
- goto failure;
- }
- }
- XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
- XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
- if (next == lp)
- XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
- else if (!lp_simple)
- XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
- else if (prev != lp)
- {
- remove_insn (lp);
- add_insn_after (lp, prev, NULL);
- }
- if (!lp_simple)
- {
- XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
- = gen_rtx_LABEL_REF (Pmode, top_label);
- add_reg_note (lp, REG_LABEL_OPERAND, top_label);
- LABEL_NUSES (top_label)++;
- }
- /* We can avoid tedious loop start / end setting for empty loops
- be merely setting the loop count to its final value. */
- if (next_active_insn (top_label) == insn)
- {
- rtx lc_set
- = gen_rtx_SET (XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
- const0_rtx);
-
- rtx_insn *lc_set_insn = emit_insn_before (lc_set, insn);
- delete_insn (lp);
- delete_insn (insn);
- insn = lc_set_insn;
- }
- /* If the loop is non-empty with zero length, we can't make it
- a zero-overhead loop. That can happen for empty asms. */
- else
- {
- rtx_insn *scan;
+ df_analyze ();
- for (scan = top_label;
- (scan && scan != insn
- && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
- scan = NEXT_INSN (scan));
- if (scan == insn)
- {
- remove_insn (lp);
- goto failure;
- }
- }
- }
- else
- {
- /* Sometimes the loop optimizer makes a complete hash of the
- loop. If it were only that the loop is not entered at the
- top, we could fix this up by setting LP_START with SR .
- However, if we can't find the loop begin were it should be,
- chances are that it does not even dominate the loop, but is
- inside the loop instead. Using SR there would kill
- performance.
- We use the doloop_fallback pattern here, which executes
- in two cycles on the ARC700 when predicted correctly. */
- failure:
- if (!REG_P (op0))
- {
- rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
+ /* Doloop optimization. */
+ arc_reorg_loops ();
- emit_insn_before (gen_move_insn (op3, op0), insn);
- PATTERN (insn)
- = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
- }
- else
- XVEC (PATTERN (insn), 0)
- = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
- XVECEXP (PATTERN (insn), 0, 1));
- INSN_CODE (insn) = -1;
- }
- }
- }
+ workaround_arc_anomaly ();
/* FIXME: should anticipate ccfsm action, generate special patterns for
to-be-deleted branches that have no delay slot and have at least the
return 6;
}
- /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */
- if (TARGET_ARC700
- && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
- || from_class == WRITABLE_CORE_REGS))
- return 8;
+ /* Using lp_count as scratch reg is a VERY bad idea. */
+ if (from_class == LPCOUNT_REG)
+ return 1000;
+ if (to_class == LPCOUNT_REG)
+ return 6;
/* Force an attempt to 'mov Dy,Dx' to spill. */
if ((TARGET_ARC700 || TARGET_EM) && TARGET_DPFP
{
if (!TARGET_ARC600)
return 0;
- /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
- in front of SUCC anyway, so there will be separation between PRED and
- SUCC. */
- if (recog_memoized (succ) == CODE_FOR_doloop_end_i
- && LABEL_P (prev_nonnote_insn (succ)))
- return 0;
- if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
- return 0;
if (GET_CODE (PATTERN (pred)) == SEQUENCE)
pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
if (GET_CODE (PATTERN (succ)) == SEQUENCE)
return 0;
}
-/* We might have a CALL to a non-returning function before a loop end.
- ??? Although the manual says that's OK (the target is outside the
- loop, and the loop counter unused there), the assembler barfs on
- this for ARC600, so we must insert a nop before such a call too.
- For ARC700, and ARCv2 is not allowed to have the last ZOL
- instruction a jump to a location where lp_count is modified. */
-
-static bool
-arc_loop_hazard (rtx_insn *pred, rtx_insn *succ)
-{
- rtx_insn *jump = NULL;
- rtx label_rtx = NULL_RTX;
- rtx_insn *label = NULL;
- basic_block succ_bb;
-
- if (recog_memoized (succ) != CODE_FOR_doloop_end_i)
- return false;
-
- /* Phase 1: ARC600 and ARCv2HS doesn't allow any control instruction
- (i.e., jump/call) as the last instruction of a ZOL. */
- if (TARGET_ARC600 || TARGET_HS)
- if (JUMP_P (pred) || CALL_P (pred)
- || arc_asm_insn_p (PATTERN (pred))
- || GET_CODE (PATTERN (pred)) == SEQUENCE)
- return true;
-
- /* Phase 2: Any architecture, it is not allowed to have the last ZOL
- instruction a jump to a location where lp_count is modified. */
-
- /* Phase 2a: Dig for the jump instruction. */
- if (JUMP_P (pred))
- jump = pred;
- else if (GET_CODE (PATTERN (pred)) == SEQUENCE
- && JUMP_P (XVECEXP (PATTERN (pred), 0, 0)))
- jump = as_a <rtx_insn *> (XVECEXP (PATTERN (pred), 0, 0));
- else
- return false;
-
- /* Phase 2b: Make sure is not a millicode jump. */
- if ((GET_CODE (PATTERN (jump)) == PARALLEL)
- && (XVECEXP (PATTERN (jump), 0, 0) == ret_rtx))
- return false;
-
- label_rtx = JUMP_LABEL (jump);
- if (!label_rtx)
- return false;
-
- /* Phase 2c: Make sure is not a return. */
- if (ANY_RETURN_P (label_rtx))
- return false;
-
- /* Pahse 2d: Go to the target of the jump and check for aliveness of
- LP_COUNT register. */
- label = safe_as_a <rtx_insn *> (label_rtx);
- succ_bb = BLOCK_FOR_INSN (label);
- if (!succ_bb)
- {
- gcc_assert (NEXT_INSN (label));
- if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (label)))
- succ_bb = NOTE_BASIC_BLOCK (NEXT_INSN (label));
- else
- succ_bb = BLOCK_FOR_INSN (NEXT_INSN (label));
- }
-
- if (succ_bb && REGNO_REG_SET_P (df_get_live_out (succ_bb), LP_COUNT))
- return true;
-
- return false;
-}
-
/* For ARC600:
A write to a core reg greater or equal to 32 must not be immediately
followed by a use. Anticipate the length requirement to insert a nop
if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
return 0;
- if (arc_loop_hazard (pred, succ))
- return 4;
-
if (TARGET_ARC600)
return arc600_corereg_hazard (pred, succ);
if (GET_CODE (PATTERN (insn)) == SEQUENCE)
return len;
- /* It is impossible to jump to the very end of a Zero-Overhead Loop, as
- the ZOL mechanism only triggers when advancing to the end address,
- so if there's a label at the end of a ZOL, we need to insert a nop.
- The ARC600 ZOL also has extra restrictions on jumps at the end of a
- loop. */
- if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
- {
- rtx_insn *prev = prev_nonnote_insn (insn);
-
- return ((LABEL_P (prev)
- || (TARGET_ARC600
- && (JUMP_P (prev)
- || CALL_P (prev) /* Could be a noreturn call. */
- || (NONJUMP_INSN_P (prev)
- && GET_CODE (PATTERN (prev)) == SEQUENCE))))
- ? len + 4 : len);
- }
-
/* Check for return with but one preceding insn since function
start / call. */
if (TARGET_PAD_RETURN
return cfun->machine->arc_reorg_started;
}
-/* Oddly enough, sometimes we get a zero overhead loop that branch
- shortening doesn't think is a loop - observed with compile/pr24883.c
- -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the
- alignment visible for branch shortening (we actually align the loop
- insn before it, but that is equivalent since the loop insn is 4 byte
- long.) */
-
int
arc_label_align (rtx_insn *label)
{
- int loop_align = LOOP_ALIGN (LABEL);
-
- if (loop_align > align_labels_log)
- {
- rtx_insn *prev = prev_nonnote_insn (label);
-
- if (prev && NONJUMP_INSN_P (prev)
- && GET_CODE (PATTERN (prev)) == PARALLEL
- && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
- return loop_align;
- }
/* Code has a minimum p2 alignment of 1, which we must restore after an
ADDR_DIFF_VEC. */
if (align_labels_log < 1)
{0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'q', r0-r3, r12-r15 */ \
{0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'e', r0-r3, r12-r15, sp */ \
{0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* "Rsc", r0-r12 */ \
- {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \
+ {0x9fffffff, 0x80000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \
{0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'W', r0-r31 */ \
/* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry. As these \
registers are fixed, it does not affect the literal meaning of the \
constraints, but it makes it a superset of GENERAL_REGS, thus \
enabling some operations that would otherwise not be possible. */ \
- {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \
- {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \
- {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \
+ {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \
+ {0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \
+ {0xffffffff, 0x9fffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \
{0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rcd', r0-r3 */ \
{0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rsd', r0-r1 */ \
{0x9fffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'h', r0-28, r30 */ \
of a loop. */
/* On the ARC, align loops to 4 byte boundaries unless doing all-out size
optimization. */
-#define LOOP_ALIGN JUMP_ALIGN
+#define LOOP_ALIGN(X) 0
#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL))
(eq_attr "annul_ret_delay_insn" "yes")
(eq_attr "cond_ret_delay_insn" "yes")])
+(define_delay (eq_attr "type" "loop_end")
+ [(eq_attr "in_delay_slot" "true")
+ (eq_attr "in_delay_slot" "true")
+ (nil)])
+
;; For ARC600, unexposing the delay sloy incurs a penalty also in the
;; non-taken case, so the only meaningful way to have an annull-true
;; filled delay slot is to conditionalize the delay slot insn.
; The iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
(define_insn "*movqi_insn"
- [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc")
- (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
+ [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,h,w*l,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc")
+ (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac,i, ?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode)"
"@
"if (prepare_move_operands (operands, HImode)) DONE;")
(define_insn "*movhi_insn"
- [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc")
- (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
+ [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l,???w,Rcq#q,h,w*l,Rcq, S, r,r, Ucm,m,???m, m,VUsc")
+ (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,?Rac, i,i, ?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))]
"register_operand (operands[0], HImode)
|| register_operand (operands[1], HImode)
|| (CONSTANT_P (operands[1])
; the iscompact attribute allows the epilogue expander to know for which
; insns it should lengthen the return insn.
; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc .
-(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w,w, w, w, w, w,???w, ?w, w,Rcq#q, h, w,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc")
- (match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL,I,Crr,Clo,Chi,Cbi,?Rac,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))]
+(define_insn "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h,w*l,w*l, w, w, w, w, ???w, ?w, w,Rcq#q, h, w*l,Rcq, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m,???m, m,VUsc")
+ (match_operand:SI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1, cL, I,Crr,Clo,Chi,Cbi,?Rac*l,Cpc,Clb, ?Cal,Cal,?Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, w,!*Rzd,c,?Rac,Cm3, C32"))]
"register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode)
|| (CONSTANT_P (operands[1])
xtr, const0_rtx);
})
+;; -------------------------------------------------------------------
+;; Hardware loop
+;; -------------------------------------------------------------------
+
; operand 0 is the loop count pseudo register
-; operand 1 is the loop end pattern
-(define_expand "doloop_begin"
- [(use (match_operand 0 "register_operand" ""))
- (use (match_operand 1 "" ""))]
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+ [(parallel [(set (pc)
+ (if_then_else
+ (ne (match_operand 0 "" "")
+ (const_int 1))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (match_dup 0) (plus (match_dup 0) (const_int -1)))
+ (unspec [(const_int 0)] UNSPEC_ARC_LP)
+ (clobber (match_dup 2))])]
""
{
- /* Using the INSN_UID of the loop end pattern to identify it causes
- trouble with -fcompare-debug, so allocate a debug-independent
- id instead. We use negative numbers so that we can use the same
- slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and
- still be able to tell what kind of number this is. */
- static HOST_WIDE_INT loop_end_id = 0;
-
- rtx id = GEN_INT (--loop_end_id);
- XEXP (XVECEXP (PATTERN (operands[1]), 0, 4), 0) = id;
- emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id,
- const0_rtx, const0_rtx));
- DONE;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+ operands[2] = gen_rtx_SCRATCH (SImode);
})
-; ??? can't describe the insn properly as then the optimizers try to
-; hoist the SETs.
-;(define_insn "doloop_begin_i"
-; [(set (reg:SI LP_START) (pc))
-; (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_ARC_LP))
-; (use (match_operand 0 "const_int_operand" "n"))]
-; ""
-; "lp .L__GCC__LP%0"
-;)
-
-; The operands of doloop_end_i are also read / written by arc_reorg with
-; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you
-; might have to adjust arc_reorg.
-; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in
-; by arc_reorg. arc_reorg might also alter operand 0.
-;
-; N in XVECEXP PATTERN (lp, 0 N)
-; V rtl purpose
-; 0 unspec UNSPEC_ARC_LP identify pattern
-; 1 clobber LP_START show LP_START is set
-; 2 clobber LP_END show LP_END is set
-; 3 use operand0 loop count pseudo register
-; 4 use operand1 before arc_reorg: -id
-; after : CODE_LABEL_NUMBER of loop top label
-; 5 use operand2 INSN_UID of loop end insn
-; 6 use operand3 loop setup not at start (1 above, 2 below)
-; 7 use operand4 LABEL_REF of top label, if not
-; immediately following
-; If operand1 is still zero after arc_reorg, this is an orphaned loop
-; instruction that was not at the start of the loop.
-; There is no point is reloading this insn - then lp_count would still not
-; be available for the loop end.
-(define_insn "doloop_begin_i"
- [(unspec:SI [(pc)] UNSPEC_ARC_LP)
- (clobber (reg:SI LP_START))
- (clobber (reg:SI LP_END))
- (use (match_operand:SI 0 "register_operand" "l,l,????*X"))
- (use (match_operand 1 "const_int_operand" "n,n,C_0"))
- (use (match_operand 2 "const_int_operand" "n,n,X"))
- (use (match_operand 3 "const_int_operand" "C_0,n,X"))
- (use (match_operand 4 "const_int_operand" "C_0,X,X"))]
+(define_insn "arc_lp"
+ [(unspec:SI [(match_operand:SI 0 "register_operand" "l")]
+ UNSPEC_ARC_LP)
+ (use (label_ref (match_operand 1 "" "")))
+ (use (label_ref (match_operand 2 "" "")))]
""
-{
- rtx_insn *scan;
- int len, size = 0;
- int n_insns = 0;
- rtx loop_start = operands[4];
-
- if (CONST_INT_P (loop_start))
- loop_start = NULL_RTX;
- /* Size implications of the alignment will be taken care of by the
- alignment inserted at the loop start. */
- if (LOOP_ALIGN (0) && INTVAL (operands[1]))
- {
- asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
- arc_clear_unalign ();
- }
- if (!INTVAL (operands[1]))
- return "; LITTLE LOST LOOP";
- if (loop_start && flag_pic)
- {
- /* ??? Can do better for when a scratch register
- is known. But that would require extra testing. */
- return "push_s r0\;add r0,pcl,%4@pcl\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0";
- }
- /* Check if the loop end is in range to be set by the lp instruction. */
- size = INTVAL (operands[3]) < 2 ? 0 : 2048;
- for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan))
- {
- if (!INSN_P (scan))
- continue;
- if (recog_memoized (scan) == CODE_FOR_doloop_end_i
- && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0)
- == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)))
- break;
- len = get_attr_length (scan);
- size += len;
- }
- /* Try to verify that there are at least three instruction fetches
- between the loop setup and the first encounter of the loop end. */
- for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan))
- {
- if (!INSN_P (scan))
- continue;
- if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (scan)))
- scan = seq->insn (0);
- if (JUMP_P (scan))
- {
- if (recog_memoized (scan) != CODE_FOR_doloop_end_i)
- {
- n_insns += 2;
- if (simplejump_p (scan))
- {
- scan = as_a <rtx_insn *> (XEXP (SET_SRC (PATTERN (scan)), 0));
- continue;
- }
-
- rtx lab = JUMP_LABEL (scan);
- if (!lab)
- break;
-
- rtx_insn *next_scan
- = next_active_insn (NEXT_INSN (PREV_INSN (scan)));
- if (next_scan
- && recog_memoized (next_scan) != CODE_FOR_doloop_begin_i)
- break;
-
- /* JUMP_LABEL might be simple_return instead if an insn. */
- if (!INSN_P (lab))
- {
- n_insns++;
- break;
- }
-
- rtx_insn *next_lab = next_active_insn (as_a<rtx_insn *> (lab));
- if (next_lab
- && recog_memoized (next_lab) != CODE_FOR_doloop_begin_i)
- break;
-
- n_insns++;
- }
- break;
- }
- len = get_attr_length (scan);
- /* Size estimation of asms assumes that each line which is nonempty
- codes an insn, and that each has a long immediate. For minimum insn
- count, assume merely that a nonempty asm has at least one insn. */
- if (GET_CODE (PATTERN (scan)) == ASM_INPUT
- || asm_noperands (PATTERN (scan)) >= 0)
- n_insns += (len != 0);
- else
- n_insns += (len > 4 ? 2 : (len ? 1 : 0));
- }
- if (LOOP_ALIGN (0))
- {
- asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
- arc_clear_unalign ();
- }
- gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL);
- if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start)
- {
- if (flag_pic)
- {
- /* ??? Can do better for when a scratch register
- is known. But that would require extra testing. */
- arc_clear_unalign ();
- return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1@pcl\;sr r0,[3]; LP_END\;pop_s r0";
- }
- output_asm_insn ((size < 2048
- ? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"),
- operands);
- output_asm_insn (loop_start
- ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START",
- operands);
- if (TARGET_ARC600 && n_insns < 1)
- output_asm_insn ("nop", operands);
- return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:";
- }
- else if (TARGET_ARC600 && n_insns < 3)
- {
- /* At least four instructions are needed between the setting of LP_COUNT
- and the loop end - but the lp instruction qualifies as one. */
- rtx_insn *prev = prev_nonnote_insn (insn);
-
- if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT))
- output_asm_insn ("nop", operands);
- }
- return "lp .L__GCC__LP%1";
-}
+ "lp\\t@%l2\\t; %0:@%l1->@%l2"
[(set_attr "type" "loop_setup")
- (set_attr_alternative "length"
-; FIXME: length is usually 4, but we need branch shortening
-; to get this right.
-; [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4))
- [(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16))
- (if_then_else (match_test "flag_pic") (const_int 28) (const_int 16))
- (const_int 0)])]
- ;; ??? we should really branch shorten this insn, but then we'd
- ;; need a proper label first. N.B. the end label can not only go out
- ;; of range when it is far away, but also when it precedes the loop -
- ;; which, unfortunately, it sometimes does, when the loop "optimizer"
- ;; messes things up.
-)
-
-; operand 0 is the loop count pseudo register
-; operand 1 is the label to jump to at the top of the loop
-; Use this for the ARC600 and ARC700.
-; ??? ARC600 might want to check if the loop has few iteration and only a
-; single insn - loop setup is expensive then.
-(define_expand "doloop_end"
- [(use (match_operand 0 "register_operand" ""))
- (use (label_ref (match_operand 1 "" "")))]
- "!TARGET_ARC601"
-{
- /* We could do smaller bivs with biv widening, and wider bivs by having
- a high-word counter in an outer loop - but punt on this for now. */
- if (GET_MODE (operands[0]) != SImode)
- FAIL;
- emit_jump_insn (gen_doloop_end_i (operands[0], operands[1], const0_rtx));
- DONE;
-})
+ (set_attr "length" "4")])
-(define_insn_and_split "doloop_end_i"
+;; if by any chance the lp_count is not used, then use an 'r'
+;; register, instead of going to memory.
+(define_insn "loop_end"
[(set (pc)
- (if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m")
- (const_int 1))
+ (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0")
+ (const_int 1))
(label_ref (match_operand 1 "" ""))
(pc)))
- (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
- (use (reg:SI LP_START))
- (use (reg:SI LP_END))
- (use (match_operand 2 "const_int_operand" "n,???Cn0,???X"))
- (clobber (match_scratch:SI 3 "=X,X,&????r"))]
+ (set (match_operand:SI 0 "nonimmediate_operand" "=l!r,m")
+ (plus (match_dup 2) (const_int -1)))
+ (unspec [(const_int 0)] UNSPEC_ARC_LP)
+ (clobber (match_scratch:SI 3 "=X,&r"))]
""
- "*
-{
- rtx_insn *prev = prev_nonnote_insn (insn);
-
- /* If there is an immediately preceding label, we must output a nop,
- lest a branch to that label will fall out of the loop.
- ??? We could try to avoid this by claiming to have a delay slot if there
- is a preceding label, and outputting the delay slot insn instead, if
- present.
- Or we could have some optimization that changes the source edge to update
- the loop count and jump to the loop start instead. */
- /* For ARC600, we must also prevent jumps inside the loop and jumps where
- the loop counter value is live at the target from being directly at the
- loop end. Being sure that the loop counter is dead at the target is
- too much hair - we can't rely on data flow information at this point -
- so insert a nop for all branches.
- The ARC600 also can't read the loop counter in the last insn of a loop. */
- if (LABEL_P (prev))
- output_asm_insn (\"nop%?\", operands);
- return \"\\n.L__GCC__LP%2: ; loop end, start is %1\";
-}"
- "&& memory_operand (operands[0], SImode)"
- [(pc)]
-{
- emit_move_insn (operands[3], operands[0]);
- emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0]));
- DONE;
-}
- [(set_attr "type" "loop_end")
- (set (attr "length")
- (if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))")
- (const_int 4) (const_int 0)))]
-)
+ "\\t;%0 %1 %2"
+ [(set_attr "length" "0")
+ (set_attr "predicable" "no")
+ (set_attr "type" "loop_end")])
-; This pattern is generated by arc_reorg when there is no recognizable
-; loop start.
-(define_insn "*doloop_fallback"
- [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w")
- (const_int 1))
- (label_ref (match_operand 1 "" ""))
- (pc)))
- (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
- ; avoid fooling the loop optimizer into assuming this is a special insn.
- "reload_completed"
- "*return get_attr_length (insn) == 8
- ? \"brne.d %0,1,%1\;sub %0,%0,1\"
- : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";"
- [(set (attr "length")
- (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256))
- (le (minus (match_dup 1) (pc)) (const_int 244)))
- (const_int 8) (const_int 12)))
- (set_attr "type" "brcc_no_delay_slot")
- (set_attr "cond" "nocond")]
-)
+;; split pattern for the very slim chance when the loop register is
+;; memory.
+(define_split
+ [(set (pc)
+ (if_then_else (ne (match_operand:SI 0 "memory_operand")
+ (const_int 1))
+ (label_ref (match_operand 1 ""))
+ (pc)))
+ (set (match_dup 0) (plus (match_dup 0) (const_int -1)))
+ (unspec [(const_int 0)] UNSPEC_ARC_LP)
+ (clobber (match_scratch:SI 2))]
+ "memory_operand (operands[0], SImode)"
+ [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+ (set (match_dup 0) (match_dup 2))
+ (set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0)))
+ (set (pc)
+ (if_then_else (ne (reg:CC CC_REG)
+ (const_int 0))
+ (label_ref (match_dup 1))
+ (pc)))]
+ "")
-; reload can't make output reloads for jump insns, so we have to do this by hand.
-(define_insn "doloop_fallback_m"
- [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r")
- (const_int 1))
- (label_ref (match_operand 1 "" ""))
- (pc)))
- (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
- (set (match_operand:SI 2 "memory_operand" "=m")
- (plus:SI (match_dup 0) (const_int -1)))]
- ; avoid fooling the loop optimizer into assuming this is a special insn.
- "reload_completed"
- "*return get_attr_length (insn) == 12
- ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\"
- : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";"
- [(set (attr "length")
- (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252))
- (le (minus (match_dup 1) (pc)) (const_int 244)))
- (const_int 12) (const_int 16)))
- (set_attr "type" "brcc_no_delay_slot")
- (set_attr "cond" "nocond")]
-)
+(define_insn "loop_fail"
+ [(set (reg:SI LP_COUNT)
+ (plus:SI (reg:SI LP_COUNT) (const_int -1)))
+ (set (reg:CC_ZN CC_REG)
+ (compare:CC_ZN (plus:SI (reg:SI LP_COUNT) (const_int -1))
+ (const_int 0)))]
+ ""
+ "sub.f%?\\tlp_count,lp_count,1"
+ [(set_attr "iscompact" "false")
+ (set_attr "type" "compare")
+ (set_attr "cond" "set_zn")
+ (set_attr "length" "4")
+ (set_attr "predicable" "yes")])
+
+(define_insn_and_split "dbnz"
+ [(set (pc)
+ (if_then_else
+ (ne (plus:SI (match_operand:SI 0 "nonimmediate_operand" "+r!l,m")
+ (const_int -1))
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (match_dup 0)
+ (plus:SI (match_dup 0)
+ (const_int -1)))
+ (clobber (match_scratch:SI 2 "=X,r"))]
+ "TARGET_V2"
+ "@
+ dbnz%#\\t%0,%l1
+ #"
+ "TARGET_V2 && reload_completed && memory_operand (operands[0], SImode)"
+ [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))
+ (set (reg:CC CC_REG) (compare:CC (match_dup 2) (const_int 0)))
+ (set (match_dup 0) (match_dup 2))
+ (set (pc) (if_then_else (ge (reg:CC CC_REG)
+ (const_int 0))
+ (label_ref (match_dup 1))
+ (pc)))]
+ ""
+ [(set_attr "iscompact" "false")
+ (set_attr "type" "loop_end")
+ (set_attr "length" "4,20")])
(define_expand "movmemsi"
[(match_operand:BLK 0 "" "")
mrgf-banked-regs=
Target RejectNegative Joined Var(arc_deferred_options) Defer
Specifies the number of registers replicated in second register bank on entry to fast interrupt.
+
+mlpc-width=
+Target RejectNegative Joined Enum(arc_lpc) Var(arc_lpcwidth) Init(32)
+Sets LP_COUNT register width. Possible values are 8, 16, 20, 24, 28, and 32.
+
+Enum
+Name(arc_lpc) Type(int)
+
+EnumValue
+Enum(arc_lpc) String(8) Value(8)
+
+EnumValue
+Enum(arc_lpc) String(16) Value(16)
+
+EnumValue
+Enum(arc_lpc) String(20) Value(20)
+
+EnumValue
+Enum(arc_lpc) String(24) Value(24)
+
+EnumValue
+Enum(arc_lpc) String(28) Value(28)
+
+EnumValue
+Enum(arc_lpc) String(32) Value(32)
else if (TARGET_MUL64_SET
&& (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 ))
return 0;
+ else if (REGNO (op) == LP_COUNT)
+ return 1;
else
return dest_reg_operand (op, mode);
case SUBREG :
-mcrc -mdsp-packa -mdvbf -mlock -mmac-d16 -mmac-24 -mrtsc -mswape @gol
-mtelephony -mxy -misize -mannotate-align -marclinux -marclinux_prof @gol
-mlong-calls -mmedium-calls -msdata -mirq-ctrl-saved @gol
--mrgf-banked-regs -G @var{num} @gol
+-mrgf-banked-regs -mlpc-width=@var{width} -G @var{num} @gol
-mvolatile-cache -mtp-regno=@var{regno} @gol
-malign-call -mauto-modify-reg -mbbit-peephole -mno-brcc @gol
-mcase-vector-pcrel -mcompact-casesi -mno-cond-exec -mearly-cbranchsi @gol
sequences. Use this option when you are using fast interrupts in an
ARC V2 family processor. Permitted values are 4, 8, 16, and 32.
+@item -mlpc-width=@var{width}
+@opindex mlpc-width
+Specify the width of the @code{lp_count} register. Valid values for
+@var{width} are 8, 16, 20, 24, 28 and 32 bits. The default width is
+fixed to 32 bits. If the width is less than 32, the compiler does not
+attempt to transform loops in your program to use the zero-delay loop
+mechanism unless it is known that the @code{lp_count} register can
+hold the required loop-counter value. Depending on the width
+specified, the compiler and run-time library might continue to use the
+loop mechanism for various needs. This option defines macro
+@code{__ARC_LPC_WIDTH__} with the value of @var{width}.
+
@end table
The following options are passed through to the assembler, and also
+2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * gcc.target/arc/loop-1.c: Deleted.
+
2017-09-01 Claudiu Zissulescu <claziss@synopsys.com>
* gcc.target/arc/arc.exp: Test also cpp files.
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-options "-O2" } */
-
-/* This case would fail to make use of the zero-overhead loop
- instruction at one time due to a bug. */
-
-extern char a[];
-
-struct some_t
-{
- struct
- {
- int aaa;
- short bbb;
- char ccc;
- char ddd;
- } ppp[8];
-
- int www[1];
-};
-
-int b;
-
-void
-some_function ()
-{
- struct some_t *tmp = (struct some_t *) a;
-
- while ((*tmp).ppp[b].ccc)
- while(0);
-
- for (; b; b++)
- {
- if (tmp->ppp[b].ccc)
- {
- int c = tmp->ppp[b].bbb;
- int d = tmp->ppp[b].aaa;
- int e = d - tmp->www[c];
- if (e)
- tmp->ppp[b].ddd = 1;
- }
- }
-}
-
-/* { dg-final { scan-assembler "\[^\n\]+lp \\.L__GCC__" } } */