From: Steven Bosscher Date: Mon, 28 Jun 2004 22:39:21 +0000 (+0000) Subject: m32r.c (m32r_sched_odd_word_p, [...]): Remove. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5ad6fca5c3cfc7f8b8ee11163a6db53d80c30e96;p=gcc.git m32r.c (m32r_sched_odd_word_p, [...]): Remove. * config/m32r/m32r.c (m32r_sched_odd_word_p, m32r_adjust_cost, m32r_sched_init, m32r_sched_reorder, m32r_variable_issue): Remove. (TARGET_SCHED_ADJUST_COST, TARGET_SCHED_VARIABLE_ISSUE, TARGET_SCHED_INIT, TARGET_SCHED_REORDER): Don't define. * config/m32r/m32r.md: Rewrite the pipeline description as a DFA. From-SVN: r83829 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5bdb1ded839..37be9da035e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2004-06-28 Steven Bosscher + + * config/m32r/m32r.c (m32r_sched_odd_word_p, m32r_adjust_cost, + m32r_sched_init, m32r_sched_reorder, m32r_variable_issue): Remove. + (TARGET_SCHED_ADJUST_COST, TARGET_SCHED_VARIABLE_ISSUE, + TARGET_SCHED_INIT, TARGET_SCHED_REORDER): Don't define. + * config/m32r/m32r.md: Rewrite the pipeline description as a DFA. + 2004-06-28 Richard Henderson * tree.def (REALPART_EXPR, IMAGPART_EXPR): Change class to 'r'. diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c index 6599dc64bbe..5adc498c52f 100644 --- a/gcc/config/m32r/m32r.c +++ b/gcc/config/m32r/m32r.c @@ -57,9 +57,6 @@ enum m32r_model m32r_model; const char * m32r_sdata_string = M32R_SDATA_DEFAULT; enum m32r_sdata m32r_sdata; -/* Scheduler support */ -static int m32r_sched_odd_word_p; - /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_MODEL_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT #define SYMBOL_REF_MODEL(X) \ @@ -92,11 +89,7 @@ static void m32r_output_function_epilogue (FILE *, HOST_WIDE_INT); static void m32r_file_start (void); -static int m32r_adjust_cost (rtx, rtx, rtx, int); static int m32r_adjust_priority (rtx, int); -static void m32r_sched_init (FILE *, int, int); -static int m32r_sched_reorder (FILE *, int, rtx *, int *, int); -static int m32r_variable_issue (FILE *, int, rtx, int); static int m32r_issue_rate (void); static void m32r_encode_section_info (tree, rtx, int); @@ -124,18 +117,12 @@ static bool m32r_rtx_costs (rtx, int, int, int *); #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START m32r_file_start -#undef TARGET_SCHED_ADJUST_COST -#define TARGET_SCHED_ADJUST_COST m32r_adjust_cost #undef TARGET_SCHED_ADJUST_PRIORITY #define TARGET_SCHED_ADJUST_PRIORITY m32r_adjust_priority #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE m32r_issue_rate -#undef TARGET_SCHED_VARIABLE_ISSUE -#define TARGET_SCHED_VARIABLE_ISSUE m32r_variable_issue -#undef TARGET_SCHED_INIT -#define TARGET_SCHED_INIT m32r_sched_init -#undef TARGET_SCHED_REORDER -#define TARGET_SCHED_REORDER m32r_sched_reorder +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1 #undef TARGET_ENCODE_SECTION_INFO #define TARGET_ENCODE_SECTION_INFO m32r_encode_section_info @@ -1463,14 +1450,6 @@ m32r_va_arg (tree valist, tree type) return addr_rtx; } - -static int -m32r_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link ATTRIBUTE_UNUSED, - rtx dep_insn ATTRIBUTE_UNUSED, int cost) -{ - return cost; -} - /* Return true if INSN is real instruction bearing insn. */ @@ -1497,124 +1476,6 @@ m32r_adjust_priority (rtx insn, int priority) } -/* Initialize for scheduling a group of instructions. */ - -static void -m32r_sched_init (FILE * stream ATTRIBUTE_UNUSED, - int verbose ATTRIBUTE_UNUSED, - int max_ready ATTRIBUTE_UNUSED) -{ - m32r_sched_odd_word_p = FALSE; -} - - -/* Reorder the schedulers priority list if needed */ - -static int -m32r_sched_reorder (FILE * stream, int verbose, rtx * ready, - int *n_readyp, int clock ATTRIBUTE_UNUSED) -{ - int n_ready = *n_readyp; - - if (TARGET_DEBUG) - return m32r_issue_rate (); - - if (verbose <= 7) - stream = (FILE *)0; - - if (stream) - fprintf (stream, - ";;\t\t::: Looking at %d insn(s) on ready list, boundary is %s word\n", - n_ready, - (m32r_sched_odd_word_p) ? "odd" : "even"); - - if (n_ready > 1) - { - rtx * long_head = alloca (sizeof (rtx) * n_ready); - rtx * long_tail = long_head; - rtx * short_head = alloca (sizeof (rtx) * n_ready); - rtx * short_tail = short_head; - rtx * new_head = alloca (sizeof (rtx) * n_ready); - rtx * new_tail = new_head + (n_ready - 1); - int i; - - /* Loop through the instructions, classifying them as short/long. Try - to keep 2 short together and/or 1 long. Note, the ready list is - actually ordered backwards, so keep it in that manner. */ - for (i = n_ready-1; i >= 0; i--) - { - rtx insn = ready[i]; - - if (! m32r_is_insn (insn)) - { - /* Dump all current short/long insns just in case. */ - while (long_head != long_tail) - *new_tail-- = *long_head++; - - while (short_head != short_tail) - *new_tail-- = *short_head++; - - *new_tail-- = insn; - if (stream) - fprintf (stream, - ";;\t\t::: Skipping non instruction %d\n", - INSN_UID (insn)); - - } - - else - { - if (get_attr_insn_size (insn) != INSN_SIZE_SHORT) - *long_tail++ = insn; - - else - *short_tail++ = insn; - } - } - - /* If we are on an odd word, emit a single short instruction if - we can. */ - if (m32r_sched_odd_word_p && short_head != short_tail) - *new_tail-- = *short_head++; - - /* Now dump out all of the long instructions. */ - while (long_head != long_tail) - *new_tail-- = *long_head++; - - /* Now dump out all of the short instructions. */ - while (short_head != short_tail) - *new_tail-- = *short_head++; - - if (new_tail + 1 != new_head) - abort (); - - memcpy (ready, new_head, sizeof (rtx) * n_ready); - if (stream) - { - int i; - fprintf (stream, ";;\t\t::: New ready list: "); - for (i = 0; i < n_ready; i++) - { - rtx insn = ready[i]; - - fprintf (stream, " %d", INSN_UID (ready[i])); - - if (! m32r_is_insn (insn)) - fputs ("(?)", stream); - - else if (get_attr_insn_size (insn) != INSN_SIZE_SHORT) - fputs ("(l)", stream); - - else - fputs ("(s)", stream); - } - - fprintf (stream, "\n"); - } - } - return m32r_issue_rate (); -} - /* Indicate how many instructions can be issued at the same time. This is sort of a lie. The m32r can issue only 1 long insn at once, but it can issue 2 short insns. The default therefore is @@ -1626,45 +1487,6 @@ m32r_issue_rate (void) { return ((TARGET_LOW_ISSUE_RATE) ? 1 : 2); } - -/* If we have a machine that can issue a variable # of instructions - per cycle, indicate how many more instructions can be issued - after the current one. */ - -static int -m32r_variable_issue (FILE * stream, int verbose, rtx insn, int how_many) -{ - int orig_odd_word_p = m32r_sched_odd_word_p; - int short_p = FALSE; - - how_many--; - if (how_many > 0 && !TARGET_DEBUG) - { - if (! m32r_is_insn (insn)) - how_many++; - - else if (get_attr_insn_size (insn) != INSN_SIZE_SHORT) - { - how_many = 0; - m32r_sched_odd_word_p = 0; - } - else - { - m32r_sched_odd_word_p = !m32r_sched_odd_word_p; - short_p = TRUE; - } - } - - if (verbose > 7 && stream) - fprintf (stream, - ";;\t\t::: %s insn %d starts on an %s word, can emit %d more instruction(s)\n", - short_p ? "short" : "long", - INSN_UID (insn), - orig_odd_word_p ? "odd" : "even", - how_many); - - return how_many; -} /* Cost functions. */ diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md index b969bb15815..c5994ef18ca 100644 --- a/gcc/config/m32r/m32r.md +++ b/gcc/config/m32r/m32r.md @@ -60,225 +60,142 @@ [(set_attr "length" "4") (set_attr "type" "multi")]) - -;; Whether an instruction is 16-bit or 32-bit +;; Whether an instruction is short (16-bit) or long (32-bit). (define_attr "insn_size" "short,long" (if_then_else (eq_attr "type" "int2,load2,store2,shift2,mul2") (const_string "short") (const_string "long"))) -(define_attr "debug" "no,yes" - (const (symbol_ref "(TARGET_DEBUG != 0)"))) - -(define_attr "opt_size" "no,yes" - (const (symbol_ref "(optimize_size != 0)"))) - -(define_attr "m32r" "no,yes" - (const (symbol_ref "(TARGET_M32R != 0)"))) - -(define_attr "m32rx" "no,yes" - (const (symbol_ref "(TARGET_M32RX != 0)"))) - -(define_attr "m32r2" "no,yes" - (const (symbol_ref "(TARGET_M32R2 != 0)"))) - -(define_attr "m32rx_pipeline" "either,s,o,long,m32r" - (cond [(and (eq_attr "m32rx" "no") - (eq_attr "m32r2" "no")) - (const_string "m32r") - +;; The target CPU we're compiling for. +(define_attr "cpu" "m32r,m32r2,m32rx" + (cond [(ne (symbol_ref "TARGET_M32RX") (const_int 0)) + (const_string "m32rx") + (ne (symbol_ref "TARGET_M32R2") (const_int 0)) + (const_string "m32r2")] + (const_string "m32r"))) + +;; Defines the pipeline where an instruction can be executed on. +;; For the M32R, a short instruction can execute one of the two pipes. +;; For the M32Rx, the restrictions are modelled in the second +;; condition of this attribute definition. +(define_attr "m32r_pipeline" "either,s,o,long,m32r" + (cond [(and (eq_attr "cpu" "m32r") + (eq_attr "insn_size" "short")) + (const_string "either") (eq_attr "insn_size" "!short") - (const_string "long")] - - (cond [(eq_attr "type" "int2") - (const_string "either") - - (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call") - (const_string "o") - - (eq_attr "type" "mul2") - (const_string "s")] - - (const_string "long")))) + (const_string "long")] + (cond [(eq_attr "type" "int2") + (const_string "either") + (eq_attr "type" "load2,store2,shift2,uncond_branch,branch,call") + (const_string "o") + (eq_attr "type" "mul2") + (const_string "s")] + (const_string "long")))) ;; :::::::::::::::::::: ;; :: -;; :: Function Units +;; :: Pipeline description ;; :: ;; :::::::::::::::::::: -;; On most RISC machines, there are instructions whose results are not -;; available for a specific number of cycles. Common cases are instructions -;; that load data from memory. On many machines, a pipeline stall will result -;; if the data is referenced too soon after the load instruction. - -;; In addition, many newer microprocessors have multiple function units, -;; usually one for integer and one for floating point, and often will incur -;; pipeline stalls when a result that is needed is not yet ready. - -;; The descriptions in this section allow the specification of how much time -;; must elapse between the execution of an instruction and the time when its -;; result is used. It also allows specification of when the execution of an -;; instruction will delay execution of similar instructions due to function -;; unit conflicts. - -;; For the purposes of the specifications in this section, a machine is divided -;; into "function units", each of which execute a specific class of -;; instructions in first-in-first-out order. Function units that accept one -;; instruction each cycle and allow a result to be used in the succeeding -;; instruction (usually via forwarding) need not be specified. Classic RISC -;; microprocessors will normally have a single function unit, which we can call -;; `memory'. The newer "superscalar" processors will often have function units -;; for floating point operations, usually at least a floating point adder and -;; multiplier. - -;; Each usage of a function units by a class of insns is specified with a -;; `define_function_unit' expression, which looks like this: - -;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY -;; ISSUE-DELAY [CONFLICT-LIST]) - -;; NAME is a string giving the name of the function unit. - -;; MULTIPLICITY is an integer specifying the number of identical units in the -;; processor. If more than one unit is specified, they will be scheduled -;; independently. Only truly independent units should be counted; a pipelined -;; unit should be specified as a single unit. (The only common example of a -;; machine that has multiple function units for a single instruction class that -;; are truly independent and not pipelined are the two multiply and two -;; increment units of the CDC 6600.) - -;; SIMULTANEITY specifies the maximum number of insns that can be executing in -;; each instance of the function unit simultaneously or zero if the unit is -;; pipelined and has no limit. - -;; All `define_function_unit' definitions referring to function unit NAME must -;; have the same name and values for MULTIPLICITY and SIMULTANEITY. - -;; TEST is an attribute test that selects the insns we are describing in this -;; definition. Note that an insn may use more than one function unit and a -;; function unit may be specified in more than one `define_function_unit'. - -;; READY-DELAY is an integer that specifies the number of cycles after which -;; the result of the instruction can be used without introducing any stalls. - -;; ISSUE-DELAY is an integer that specifies the number of cycles after the -;; instruction matching the TEST expression begins using this unit until a -;; subsequent instruction can begin. A cost of N indicates an N-1 cycle delay. -;; A subsequent instruction may also be delayed if an earlier instruction has a -;; longer READY-DELAY value. This blocking effect is computed using the -;; SIMULTANEITY, READY-DELAY, ISSUE-DELAY, and CONFLICT-LIST terms. For a -;; normal non-pipelined function unit, SIMULTANEITY is one, the unit is taken -;; to block for the READY-DELAY cycles of the executing insn, and smaller -;; values of ISSUE-DELAY are ignored. - -;; CONFLICT-LIST is an optional list giving detailed conflict costs for this -;; unit. If specified, it is a list of condition test expressions to be -;; applied to insns chosen to execute in NAME following the particular insn -;; matching TEST that is already executing in NAME. For each insn in the list, -;; ISSUE-DELAY specifies the conflict cost; for insns not in the list, the cost -;; is zero. If not specified, CONFLICT-LIST defaults to all instructions that -;; use the function unit. - -;; Typical uses of this vector are where a floating point function unit can -;; pipeline either single- or double-precision operations, but not both, or -;; where a memory unit can pipeline loads, but not stores, etc. - -;; As an example, consider a classic RISC machine where the result of a load -;; instruction is not available for two cycles (a single "delay" instruction is -;; required) and where only one load instruction can be executed -;; simultaneously. This would be specified as: - -;; (define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0) - -;; For the case of a floating point function unit that can pipeline -;; either single or double precision, but not both, the following could be -;; specified: +;; This model is based on Chapter 2, Appendix 3 and Appendix 4 of the +;; "M32R-FPU Software Manual", Revision 1.01, plus additional information +;; obtained by our best friend and mine, Google. +;; +;; The pipeline is modelled as a fetch unit, and a core with a memory unit, +;; two execution units, where "fetch" models IF and D, "memory" for MEM1 +;; and MEM2, and "EXEC" for E, E1, E2, EM, and EA. Writeback and +;; bypasses are not modelled. +(define_automaton "m32r") + +;; We pretend there are two short (16 bits) instruction fetchers. The +;; "s" short fetcher cannot be reserved until the "o" short fetcher is +;; reserved. Some instructions reserve both the left and right fetchers. +;; These fetch units are a hack to get GCC to better pack the instructions +;; for the M32Rx processor, which has two execution pipes. +;; +;; In reality there is only one decoder, which can decode either two 16 bits +;; instructions, or a single 32 bits instruction. ;; -;; (define_function_unit "fp" 1 0 -;; (eq_attr "type" "sp_fp") 4 4 -;; [(eq_attr "type" "dp_fp")]) +;; Note, "fetch" models both the IF and the D pipeline stages. ;; -;; (define_function_unit "fp" 1 0 -;; (eq_attr "type" "dp_fp") 4 4 -;; [(eq_attr "type" "sp_fp")]) - -;; Note: The scheduler attempts to avoid function unit conflicts and uses all -;; the specifications in the `define_function_unit' expression. It has -;; recently come to our attention that these specifications may not allow -;; modeling of some of the newer "superscalar" processors that have insns using -;; multiple pipelined units. These insns will cause a potential conflict for -;; the second unit used during their execution and there is no way of -;; representing that conflict. We welcome any examples of how function unit -;; conflicts work in such processors and suggestions for their representation. - -;; Function units of the M32R -;; Units that take one cycle do not need to be specified. - -;; (define_function_unit {name} {multiplicity} {simultaneity} {test} -;; {ready-delay} {issue-delay} [{conflict-list}]) - -;; Hack to get GCC to better pack the instructions. -;; We pretend there is a separate long function unit that conflicts with -;; both the left and right 16 bit insn slots. - -(define_function_unit "short" 2 2 - (and (eq_attr "m32r" "yes") +;; The m32rx core has two execution pipes. We name them o_E and s_E. +;; In addition, there's a memory unit. + +(define_cpu_unit "o_IF,s_IF,o_E,s_E,memory" "m32r") + +;; Prevent the s pipe from being reserved before the o pipe. +(final_presence_set "s_IF" "o_IF") +(final_presence_set "s_E" "o_E") + +;; On the M32Rx, long instructions execute on both pipes, so reserve +;; both fetch slots and both pipes. +(define_reservation "long_IF" "o_IF+s_IF") +(define_reservation "long_E" "o_E+s_E") + +;; :::::::::::::::::::: + +;; Simple instructions do 4 stages: IF D E WB. WB is not modelled. +;; Hence, ready latency is 1. +(define_insn_reservation "short_left" 1 + (and (eq_attr "m32r_pipeline" "o") + (and (eq_attr "insn_size" "short") + (eq_attr "type" "!load2"))) + "o_IF,o_E") + +(define_insn_reservation "short_right" 1 + (and (eq_attr "m32r_pipeline" "s") (and (eq_attr "insn_size" "short") (eq_attr "type" "!load2"))) - 1 0 - [(eq_attr "insn_size" "long")]) + "s_IF,s_E") -(define_function_unit "short" 2 2 ;; load delay of 1 clock for mem execution + 1 clock for WB - (and (eq_attr "m32r" "yes") - (eq_attr "type" "load2")) - 3 0 - [(eq_attr "insn_size" "long")]) +(define_insn_reservation "short_either" 1 + (and (eq_attr "m32r_pipeline" "either") + (and (eq_attr "insn_size" "short") + (eq_attr "type" "!load2"))) + "o_IF|s_IF,o_E|s_E") -(define_function_unit "long" 1 1 - (and (eq_attr "m32r" "yes") +(define_insn_reservation "long_m32r" 1 + (and (eq_attr "cpu" "m32r") (and (eq_attr "insn_size" "long") (eq_attr "type" "!load4,load8"))) - 1 0 - [(eq_attr "insn_size" "short")]) + "long_IF,long_E") -(define_function_unit "long" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB - (and (eq_attr "m32r" "yes") - (and (eq_attr "insn_size" "long") - (eq_attr "type" "load4,load8"))) - 3 0 - [(eq_attr "insn_size" "short")]) - -(define_function_unit "left" 1 1 - (and (eq_attr "m32rx_pipeline" "o,either") - (eq_attr "type" "!load2")) - 1 0 - [(eq_attr "insn_size" "long")]) - -(define_function_unit "left" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB - (and (eq_attr "m32rx_pipeline" "o,either") - (eq_attr "type" "load2")) - 3 0 - [(eq_attr "insn_size" "long")]) - -(define_function_unit "right" 1 1 - (eq_attr "m32rx_pipeline" "s,either") - 1 0 - [(eq_attr "insn_size" "long")]) - -(define_function_unit "long" 1 1 - (and (eq_attr "m32rx" "yes") +(define_insn_reservation "long_m32rx" 2 + (and (eq_attr "m32r_pipeline" "long") (and (eq_attr "insn_size" "long") (eq_attr "type" "!load4,load8"))) - 2 0 - [(eq_attr "insn_size" "short")]) + "long_IF,long_E") + +;; Load/store instructions do 6 stages: IF D E MEM1 MEM2 WB. +;; MEM1 may require more than one cycle depending on locality. We +;; optimistically assume all memory is nearby, ie. MEM1 takes only +;; one cycle. Hence, ready latency is 3. + +;; The M32Rx can do short load/store only on the left pipe. +(define_insn_reservation "short_load_left" 3 + (and (eq_attr "m32r_pipeline" "o") + (and (eq_attr "insn_size" "short") + (eq_attr "type" "load2"))) + "o_IF,o_E,memory*2") -(define_function_unit "long" 1 1 ;; load delay of 1 clock for mem execution + 1 clock for WB - (and (eq_attr "m32rx" "yes") +(define_insn_reservation "short_load" 3 + (and (eq_attr "m32r_pipeline" "either") + (and (eq_attr "insn_size" "short") + (eq_attr "type" "load2"))) + "s_IF|o_IF,s_E|o_E,memory*2") + +(define_insn_reservation "long_load" 3 + (and (eq_attr "cpu" "m32r") (and (eq_attr "insn_size" "long") (eq_attr "type" "load4,load8"))) - 3 0 - [(eq_attr "insn_size" "short")]) + "long_IF,long_E,memory*2") + +(define_insn_reservation "long_load_m32rx" 3 + (and (eq_attr "m32r_pipeline" "long") + (eq_attr "type" "load4,load8")) + "long_IF,long_E,memory*2") + ;; Expand prologue as RTL (define_expand "prologue" diff --git a/gcc/target.h b/gcc/target.h index ad7936bbc2f..2c4b5300ec4 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -233,7 +233,7 @@ struct gcc_target rtx (* dfa_post_cycle_insn) (void); /* The following member value is a pointer to a function returning value which defines how many insns in queue `ready' will we try for - multi-pass scheduling. if the member value is nonzero and the + multi-pass scheduling. If the member value is nonzero and the function returns positive value, the DFA based scheduler will make multi-pass scheduling for the first cycle. In other words, we will try to choose ready insn which permits to start maximum number of