const char * m32r_sdata_string = M32R_SDATA_DEFAULT;
enum m32r_sdata m32r_sdata;
-/* Scheduler support */
-static int m32r_sched_odd_word_p;
-
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_MODEL_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT
#define SYMBOL_REF_MODEL(X) \
static void m32r_file_start (void);
-static int m32r_adjust_cost (rtx, rtx, rtx, int);
static int m32r_adjust_priority (rtx, int);
-static void m32r_sched_init (FILE *, int, int);
-static int m32r_sched_reorder (FILE *, int, rtx *, int *, int);
-static int m32r_variable_issue (FILE *, int, rtx, int);
static int m32r_issue_rate (void);
static void m32r_encode_section_info (tree, rtx, int);
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START m32r_file_start
-#undef TARGET_SCHED_ADJUST_COST
-#define TARGET_SCHED_ADJUST_COST m32r_adjust_cost
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY m32r_adjust_priority
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE m32r_issue_rate
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE m32r_variable_issue
-#undef TARGET_SCHED_INIT
-#define TARGET_SCHED_INIT m32r_sched_init
-#undef TARGET_SCHED_REORDER
-#define TARGET_SCHED_REORDER m32r_sched_reorder
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
#undef TARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO m32r_encode_section_info
return addr_rtx;
}
-\f
-static int
-m32r_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link ATTRIBUTE_UNUSED,
- rtx dep_insn ATTRIBUTE_UNUSED, int cost)
-{
- return cost;
-}
-
\f
/* Return true if INSN is real instruction bearing insn. */
}
\f
-/* Initialize for scheduling a group of instructions. */
-
-static void
-m32r_sched_init (FILE * stream ATTRIBUTE_UNUSED,
- int verbose ATTRIBUTE_UNUSED,
- int max_ready ATTRIBUTE_UNUSED)
-{
- m32r_sched_odd_word_p = FALSE;
-}
-
-\f
-/* Reorder the schedulers priority list if needed */
-
-static int
-m32r_sched_reorder (FILE * stream, int verbose, rtx * ready,
- int *n_readyp, int clock ATTRIBUTE_UNUSED)
-{
- int n_ready = *n_readyp;
-
- if (TARGET_DEBUG)
- return m32r_issue_rate ();
-
- if (verbose <= 7)
- stream = (FILE *)0;
-
- if (stream)
- fprintf (stream,
- ";;\t\t::: Looking at %d insn(s) on ready list, boundary is %s word\n",
- n_ready,
- (m32r_sched_odd_word_p) ? "odd" : "even");
-
- if (n_ready > 1)
- {
- rtx * long_head = alloca (sizeof (rtx) * n_ready);
- rtx * long_tail = long_head;
- rtx * short_head = alloca (sizeof (rtx) * n_ready);
- rtx * short_tail = short_head;
- rtx * new_head = alloca (sizeof (rtx) * n_ready);
- rtx * new_tail = new_head + (n_ready - 1);
- int i;
-
- /* Loop through the instructions, classifying them as short/long. Try
- to keep 2 short together and/or 1 long. Note, the ready list is
- actually ordered backwards, so keep it in that manner. */
- for (i = n_ready-1; i >= 0; i--)
- {
- rtx insn = ready[i];
-
- if (! m32r_is_insn (insn))
- {
- /* Dump all current short/long insns just in case. */
- while (long_head != long_tail)
- *new_tail-- = *long_head++;
-
- while (short_head != short_tail)
- *new_tail-- = *short_head++;
-
- *new_tail-- = insn;
- if (stream)
- fprintf (stream,
- ";;\t\t::: Skipping non instruction %d\n",
- INSN_UID (insn));
-
- }
-
- else
- {
- if (get_attr_insn_size (insn) != INSN_SIZE_SHORT)
- *long_tail++ = insn;
-
- else
- *short_tail++ = insn;
- }
- }
-
- /* If we are on an odd word, emit a single short instruction if
- we can. */
- if (m32r_sched_odd_word_p && short_head != short_tail)
- *new_tail-- = *short_head++;
-
- /* Now dump out all of the long instructions. */
- while (long_head != long_tail)
- *new_tail-- = *long_head++;
-
- /* Now dump out all of the short instructions. */
- while (short_head != short_tail)
- *new_tail-- = *short_head++;
-
- if (new_tail + 1 != new_head)
- abort ();
-
- memcpy (ready, new_head, sizeof (rtx) * n_ready);
- if (stream)
- {
- int i;
- fprintf (stream, ";;\t\t::: New ready list: ");
- for (i = 0; i < n_ready; i++)
- {
- rtx insn = ready[i];
-
- fprintf (stream, " %d", INSN_UID (ready[i]));
-
- if (! m32r_is_insn (insn))
- fputs ("(?)", stream);
-
- else if (get_attr_insn_size (insn) != INSN_SIZE_SHORT)
- fputs ("(l)", stream);
-
- else
- fputs ("(s)", stream);
- }
-
- fprintf (stream, "\n");
- }
- }
- return m32r_issue_rate ();
-}
-
/* Indicate how many instructions can be issued at the same time.
This is sort of a lie. The m32r can issue only 1 long insn at
once, but it can issue 2 short insns. The default therefore is
{
return ((TARGET_LOW_ISSUE_RATE) ? 1 : 2);
}
-
-/* If we have a machine that can issue a variable # of instructions
- per cycle, indicate how many more instructions can be issued
- after the current one. */
-
-static int
-m32r_variable_issue (FILE * stream, int verbose, rtx insn, int how_many)
-{
- int orig_odd_word_p = m32r_sched_odd_word_p;
- int short_p = FALSE;
-
- how_many--;
- if (how_many > 0 && !TARGET_DEBUG)
- {
- if (! m32r_is_insn (insn))
- how_many++;
-
- else if (get_attr_insn_size (insn) != INSN_SIZE_SHORT)
- {
- how_many = 0;
- m32r_sched_odd_word_p = 0;
- }
- else
- {
- m32r_sched_odd_word_p = !m32r_sched_odd_word_p;
- short_p = TRUE;
- }
- }
-
- if (verbose > 7 && stream)
- fprintf (stream,
- ";;\t\t::: %s insn %d starts on an %s word, can emit %d more instruction(s)\n",
- short_p ? "short" : "long",
- INSN_UID (insn),
- orig_odd_word_p ? "odd" : "even",
- how_many);
-
- return how_many;
-}
\f
/* Cost functions. */
[(set_attr "length" "4")
(set_attr "type" "multi")])
-
-;; Whether an instruction is 16-bit or 32-bit
+;; Whether an instruction is short (16-bit) or long (32-bit).
(define_attr "insn_size" "short,long"
(if_then_else (eq_attr "type" "int2,load2,store2,shift2,mul2")
(const_string "short")
(const_string "long")))
-(define_attr "debug" "no,yes"
- (const (symbol_ref "(TARGET_DEBUG != 0)")))
-
-(define_attr "opt_size" "no,yes"
- (const (symbol_ref "(optimize_size != 0)")))
-
-(define_attr "m32r" "no,yes"
- (const (symbol_ref "(TARGET_M32R != 0)")))
-
-(define_attr "m32rx" "no,yes"
- (const (symbol_ref "(TARGET_M32RX != 0)")))
-
-(define_attr "m32r2" "no,yes"
- (const (symbol_ref "(TARGET_M32R2 != 0)")))
-
-(define_attr "m32rx_pipeline" "either,s,o,long,m32r"
- (cond [(and (eq_attr "m32rx" "no")
- (eq_attr "m32r2" "no"))
- (const_string "m32r")
-
+;; The target CPU we're compiling for.
+(define_attr "cpu" "m32r,m32r2,m32rx"
+ (cond [(ne (symbol_ref "TARGET_M32RX") (const_int 0))
+ (const_string "m32rx")
+ (ne (symbol_ref "TARGET_M32R2") (const_int 0))
+ (const_string "m32r2")]
+ (const_string "m32r")))
+
+;; Defines the pipeline where an instruction can be executed on.
+;; For the M32R, a short instruction can execute one of the two pipes.
+;; For the M32Rx, the restrictions are modelled in the second
+;; condition of this attribute definition.
+(define_attr "m32r_pipeline" "either,s,o,long,m32r"
+ (cond [(and (eq_attr "cpu" "m32r")
+ (eq_attr "insn_size" "short"))
+ (const_string "either")
(eq_attr "insn_size" "!short")
- (const_string "long")]
-
- (cond [(eq_attr "type" "int2")
- (const_string "either")
-
- (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call")
- (const_string "o")
-
- (eq_attr "type" "mul2")
- (const_string "s")]
-
- (const_string "long"))))
+ (const_string "long")]
+ (cond [(eq_attr "type" "int2")
+ (const_string "either")
+ (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call")
+ (const_string "o")
+ (eq_attr "type" "mul2")
+ (const_string "s")]
+ (const_string "long"))))
\f
;; ::::::::::::::::::::
;; ::
-;; :: Function Units
+;; :: Pipeline description
;; ::
;; ::::::::::::::::::::
-;; On most RISC machines, there are instructions whose results are not
-;; available for a specific number of cycles. Common cases are instructions
-;; that load data from memory. On many machines, a pipeline stall will result
-;; if the data is referenced too soon after the load instruction.
-
-;; In addition, many newer microprocessors have multiple function units,
-;; usually one for integer and one for floating point, and often will incur
-;; pipeline stalls when a result that is needed is not yet ready.
-
-;; The descriptions in this section allow the specification of how much time
-;; must elapse between the execution of an instruction and the time when its
-;; result is used. It also allows specification of when the execution of an
-;; instruction will delay execution of similar instructions due to function
-;; unit conflicts.
-
-;; For the purposes of the specifications in this section, a machine is divided
-;; into "function units", each of which execute a specific class of
-;; instructions in first-in-first-out order. Function units that accept one
-;; instruction each cycle and allow a result to be used in the succeeding
-;; instruction (usually via forwarding) need not be specified. Classic RISC
-;; microprocessors will normally have a single function unit, which we can call
-;; `memory'. The newer "superscalar" processors will often have function units
-;; for floating point operations, usually at least a floating point adder and
-;; multiplier.
-
-;; Each usage of a function units by a class of insns is specified with a
-;; `define_function_unit' expression, which looks like this:
-
-;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
-;; ISSUE-DELAY [CONFLICT-LIST])
-
-;; NAME is a string giving the name of the function unit.
-
-;; MULTIPLICITY is an integer specifying the number of identical units in the
-;; processor. If more than one unit is specified, they will be scheduled
-;; independently. Only truly independent units should be counted; a pipelined
-;; unit should be specified as a single unit. (The only common example of a
-;; machine that has multiple function units for a single instruction class that
-;; are truly independent and not pipelined are the two multiply and two
-;; increment units of the CDC 6600.)
-
-;; SIMULTANEITY specifies the maximum number of insns that can be executing in
-;; each instance of the function unit simultaneously or zero if the unit is
-;; pipelined and has no limit.
-
-;; All `define_function_unit' definitions referring to function unit NAME must
-;; have the same name and values for MULTIPLICITY and SIMULTANEITY.
-
-;; TEST is an attribute test that selects the insns we are describing in this
-;; definition. Note that an insn may use more than one function unit and a
-;; function unit may be specified in more than one `define_function_unit'.
-
-;; READY-DELAY is an integer that specifies the number of cycles after which
-;; the result of the instruction can be used without introducing any stalls.
-
-;; ISSUE-DELAY is an integer that specifies the number of cycles after the
-;; instruction matching the TEST expression begins using this unit until a
-;; subsequent instruction can begin. A cost of N indicates an N-1 cycle delay.
-;; A subsequent instruction may also be delayed if an earlier instruction has a
-;; longer READY-DELAY value. This blocking effect is computed using the
-;; SIMULTANEITY, READY-DELAY, ISSUE-DELAY, and CONFLICT-LIST terms. For a
-;; normal non-pipelined function unit, SIMULTANEITY is one, the unit is taken
-;; to block for the READY-DELAY cycles of the executing insn, and smaller
-;; values of ISSUE-DELAY are ignored.
-
-;; CONFLICT-LIST is an optional list giving detailed conflict costs for this
-;; unit. If specified, it is a list of condition test expressions to be
-;; applied to insns chosen to execute in NAME following the particular insn
-;; matching TEST that is already executing in NAME. For each insn in the list,
-;; ISSUE-DELAY specifies the conflict cost; for insns not in the list, the cost
-;; is zero. If not specified, CONFLICT-LIST defaults to all instructions that
-;; use the function unit.
-
-;; Typical uses of this vector are where a floating point function unit can
-;; pipeline either single- or double-precision operations, but not both, or
-;; where a memory unit can pipeline loads, but not stores, etc.
-
-;; As an example, consider a classic RISC machine where the result of a load
-;; instruction is not available for two cycles (a single "delay" instruction is
-;; required) and where only one load instruction can be executed
-;; simultaneously. This would be specified as:
-
-;; (define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)
-
-;; For the case of a floating point function unit that can pipeline
-;; either single or double precision, but not both, the following could be
-;; specified:
+;; This model is based on Chapter 2, Appendix 3 and Appendix 4 of the
+;; "M32R-FPU Software Manual", Revision 1.01, plus additional information
+;; obtained by our best friend and mine, Google.
+;;
+;; The pipeline is modelled as a fetch unit, and a core with a memory unit,
+;; two execution units, where "fetch" models IF and D, "memory" for MEM1
+;; and MEM2, and "EXEC" for E, E1, E2, EM, and EA. Writeback and
+;; bypasses are not modelled.
+(define_automaton "m32r")
+
+;; We pretend there are two short (16 bits) instruction fetchers. The
+;; "s" short fetcher cannot be reserved until the "o" short fetcher is
+;; reserved. Some instructions reserve both the left and right fetchers.
+;; These fetch units are a hack to get GCC to better pack the instructions
+;; for the M32Rx processor, which has two execution pipes.
+;;
+;; In reality there is only one decoder, which can decode either two 16 bits
+;; instructions, or a single 32 bits instruction.
;;
-;; (define_function_unit "fp" 1 0
-;; (eq_attr "type" "sp_fp") 4 4
-;; [(eq_attr "type" "dp_fp")])
+;; Note, "fetch" models both the IF and the D pipeline stages.
;;
-;; (define_function_unit "fp" 1 0
-;; (eq_attr "type" "dp_fp") 4 4
-;; [(eq_attr "type" "sp_fp")])
-
-;; Note: The scheduler attempts to avoid function unit conflicts and uses all
-;; the specifications in the `define_function_unit' expression. It has
-;; recently come to our attention that these specifications may not allow
-;; modeling of some of the newer "superscalar" processors that have insns using
-;; multiple pipelined units. These insns will cause a potential conflict for
-;; the second unit used during their execution and there is no way of
-;; representing that conflict. We welcome any examples of how function unit
-;; conflicts work in such processors and suggestions for their representation.
-
-;; Function units of the M32R
-;; Units that take one cycle do not need to be specified.
-
-;; (define_function_unit {name} {multiplicity} {simultaneity} {test}
-;; {ready-delay} {issue-delay} [{conflict-list}])
-
-;; Hack to get GCC to better pack the instructions.
-;; We pretend there is a separate long function unit that conflicts with
-;; both the left and right 16 bit insn slots.
-
-(define_function_unit "short" 2 2
- (and (eq_attr "m32r" "yes")
+;; The m32rx core has two execution pipes. We name them o_E and s_E.
+;; In addition, there's a memory unit.
+
+(define_cpu_unit "o_IF,s_IF,o_E,s_E,memory" "m32r")
+
+;; Prevent the s pipe from being reserved before the o pipe.
+(final_presence_set "s_IF" "o_IF")
+(final_presence_set "s_E" "o_E")
+
+;; On the M32Rx, long instructions execute on both pipes, so reserve
+;; both fetch slots and both pipes.
+(define_reservation "long_IF" "o_IF+s_IF")
+(define_reservation "long_E" "o_E+s_E")
+
+;; ::::::::::::::::::::
+
+;; Simple instructions do 4 stages: IF D E WB. WB is not modelled.
+;; Hence, ready latency is 1.
+(define_insn_reservation "short_left" 1
+ (and (eq_attr "m32r_pipeline" "o")
+ (and (eq_attr "insn_size" "short")
+ (eq_attr "type" "!load2")))
+ "o_IF,o_E")
+
+(define_insn_reservation "short_right" 1
+ (and (eq_attr "m32r_pipeline" "s")
(and (eq_attr "insn_size" "short")
(eq_attr "type" "!load2")))
- 1 0
- [(eq_attr "insn_size" "long")])
+ "s_IF,s_E")
-(define_function_unit "short" 2 2 ;; load delay of 1 clock for mem execution + 1 clock for WB
- (and (eq_attr "m32r" "yes")
- (eq_attr "type" "load2"))
- 3 0
- [(eq_attr "insn_size" "long")])
+(define_insn_reservation "short_either" 1
+ (and (eq_attr "m32r_pipeline" "either")
+ (and (eq_attr "insn_size" "short")
+ (eq_attr "type" "!load2")))
+ "o_IF|s_IF,o_E|s_E")
-(define_function_unit "long" 1 1
- (and (eq_attr "m32r" "yes")
+(define_insn_reservation "long_m32r" 1
+ (and (eq_attr "cpu" "m32r")
(and (eq_attr "insn_size" "long")
(eq_attr "type" "!load4,load8")))
- 1 0
- [(eq_attr "insn_size" "short")])
+ "long_IF,long_E")
-(define_function_unit "long" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB
- (and (eq_attr "m32r" "yes")
- (and (eq_attr "insn_size" "long")
- (eq_attr "type" "load4,load8")))
- 3 0
- [(eq_attr "insn_size" "short")])
-
-(define_function_unit "left" 1 1
- (and (eq_attr "m32rx_pipeline" "o,either")
- (eq_attr "type" "!load2"))
- 1 0
- [(eq_attr "insn_size" "long")])
-
-(define_function_unit "left" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB
- (and (eq_attr "m32rx_pipeline" "o,either")
- (eq_attr "type" "load2"))
- 3 0
- [(eq_attr "insn_size" "long")])
-
-(define_function_unit "right" 1 1
- (eq_attr "m32rx_pipeline" "s,either")
- 1 0
- [(eq_attr "insn_size" "long")])
-
-(define_function_unit "long" 1 1
- (and (eq_attr "m32rx" "yes")
+(define_insn_reservation "long_m32rx" 2
+ (and (eq_attr "m32r_pipeline" "long")
(and (eq_attr "insn_size" "long")
(eq_attr "type" "!load4,load8")))
- 2 0
- [(eq_attr "insn_size" "short")])
+ "long_IF,long_E")
+
+;; Load/store instructions do 6 stages: IF D E MEM1 MEM2 WB.
+;; MEM1 may require more than one cycle depending on locality. We
+;; optimistically assume all memory is nearby, ie. MEM1 takes only
+;; one cycle. Hence, ready latency is 3.
+
+;; The M32Rx can do short load/store only on the left pipe.
+(define_insn_reservation "short_load_left" 3
+ (and (eq_attr "m32r_pipeline" "o")
+ (and (eq_attr "insn_size" "short")
+ (eq_attr "type" "load2")))
+ "o_IF,o_E,memory*2")
-(define_function_unit "long" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB
- (and (eq_attr "m32rx" "yes")
+(define_insn_reservation "short_load" 3
+ (and (eq_attr "m32r_pipeline" "either")
+ (and (eq_attr "insn_size" "short")
+ (eq_attr "type" "load2")))
+ "s_IF|o_IF,s_E|o_E,memory*2")
+
+(define_insn_reservation "long_load" 3
+ (and (eq_attr "cpu" "m32r")
(and (eq_attr "insn_size" "long")
(eq_attr "type" "load4,load8")))
- 3 0
- [(eq_attr "insn_size" "short")])
+ "long_IF,long_E,memory*2")
+
+(define_insn_reservation "long_load_m32rx" 3
+ (and (eq_attr "m32r_pipeline" "long")
+ (eq_attr "type" "load4,load8"))
+ "long_IF,long_E,memory*2")
+
\f
;; Expand prologue as RTL
(define_expand "prologue"