#include "target-def.h"
#include "langhooks.h"
+static int ia32_use_dfa_pipeline_interface PARAMS ((void));
+
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia32_use_dfa_pipeline_interface
+
+static int
+ia32_use_dfa_pipeline_interface ()
+{
+ if (ix86_cpu == PROCESSOR_PENTIUM)
+ return 1;
+ return 0;
+}
+
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
#endif
static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
static int ix86_safe_length PARAMS ((rtx));
static enum attr_memory ix86_safe_memory PARAMS ((rtx));
-static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
static void ix86_dump_ppro_packet PARAMS ((FILE *));
static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
-static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
- rtx));
static void ix86_init_machine_status PARAMS ((struct function *));
static void ix86_mark_machine_status PARAMS ((struct function *));
static void ix86_free_machine_status PARAMS ((struct function *));
static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
-static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
return MEMORY_UNKNOWN;
}
-static enum attr_pent_pair
-ix86_safe_pent_pair (insn)
- rtx insn;
-{
- if (recog_memoized (insn) >= 0)
- return get_attr_pent_pair (insn);
- else
- return PENT_PAIR_NP;
-}
-
static enum attr_ppro_uops
ix86_safe_ppro_uops (insn)
rtx insn;
}
}
-/* Find an instruction with given pairability and minimal amount of cycles
- lost by the fact that the CPU waits for both pipelines to finish before
- reading next instructions. Also take care that both instructions together
- can not exceed 7 bytes. */
-
-static rtx *
-ix86_pent_find_pair (e_ready, ready, type, first)
- rtx *e_ready;
- rtx *ready;
- enum attr_pent_pair type;
- rtx first;
-{
- int mincycles, cycles;
- enum attr_pent_pair tmp;
- enum attr_memory memory;
- rtx *insnp, *bestinsnp = NULL;
-
- if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
- return NULL;
-
- memory = ix86_safe_memory (first);
- cycles = result_ready_cost (first);
- mincycles = INT_MAX;
-
- for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
- if ((tmp = ix86_safe_pent_pair (*insnp)) == type
- && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
- {
- enum attr_memory second_memory;
- int secondcycles, currentcycles;
-
- second_memory = ix86_safe_memory (*insnp);
- secondcycles = result_ready_cost (*insnp);
- currentcycles = abs (cycles - secondcycles);
-
- if (secondcycles >= 1 && cycles >= 1)
- {
- /* Two read/modify/write instructions together takes two
- cycles longer. */
- if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
- currentcycles += 2;
-
- /* Read modify/write instruction followed by read/modify
- takes one cycle longer. */
- if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
- && tmp != PENT_PAIR_UV
- && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
- currentcycles += 1;
- }
- if (currentcycles < mincycles)
- bestinsnp = insnp, mincycles = currentcycles;
- }
-
- return bestinsnp;
-}
-
-/* Subroutines of ix86_sched_reorder. */
-
-static void
-ix86_sched_reorder_pentium (ready, e_ready)
- rtx *ready;
- rtx *e_ready;
-{
- enum attr_pent_pair pair1, pair2;
- rtx *insnp;
-
- /* This wouldn't be necessary if Haifa knew that static insn ordering
- is important to which pipe an insn is issued to. So we have to make
- some minor rearrangements. */
-
- pair1 = ix86_safe_pent_pair (*e_ready);
-
- /* If the first insn is non-pairable, let it be. */
- if (pair1 == PENT_PAIR_NP)
- return;
-
- pair2 = PENT_PAIR_NP;
- insnp = 0;
-
- /* If the first insn is UV or PV pairable, search for a PU
- insn to go with. */
- if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
- {
- insnp = ix86_pent_find_pair (e_ready-1, ready,
- PENT_PAIR_PU, *e_ready);
- if (insnp)
- pair2 = PENT_PAIR_PU;
- }
-
- /* If the first insn is PU or UV pairable, search for a PV
- insn to go with. */
- if (pair2 == PENT_PAIR_NP
- && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
- {
- insnp = ix86_pent_find_pair (e_ready-1, ready,
- PENT_PAIR_PV, *e_ready);
- if (insnp)
- pair2 = PENT_PAIR_PV;
- }
-
- /* If the first insn is pairable, search for a UV
- insn to go with. */
- if (pair2 == PENT_PAIR_NP)
- {
- insnp = ix86_pent_find_pair (e_ready-1, ready,
- PENT_PAIR_UV, *e_ready);
- if (insnp)
- pair2 = PENT_PAIR_UV;
- }
-
- if (pair2 == PENT_PAIR_NP)
- return;
-
- /* Found something! Decide if we need to swap the order. */
- if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
- || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
- && ix86_safe_memory (*e_ready) == MEMORY_BOTH
- && ix86_safe_memory (*insnp) == MEMORY_LOAD))
- ix86_reorder_insn (insnp, e_ready);
- else
- ix86_reorder_insn (insnp, e_ready - 1);
-}
-
static void
ix86_sched_reorder_ppro (ready, e_ready)
rtx *ready;
default:
break;
- case PROCESSOR_PENTIUM:
- ix86_sched_reorder_pentium (ready, e_ready);
- break;
-
case PROCESSOR_PENTIUMPRO:
ix86_sched_reorder_ppro (ready, e_ready);
break;
]
(const_string "np")))
-;; Rough readiness numbers. Fine tuning happens in i386.c.
-;;
-;; u describes pipe U
-;; v describes pipe V
-;; uv describes either pipe U or V for those that can issue to either
-;; np describes not paring
-;; fpu describes fpu
-;; fpm describes fp insns of different types are not pipelined.
-;;
-;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
+(define_automaton "pentium,pentium_fpu")
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "imul"))
- 11 11)
-
-(define_function_unit "pent_mul" 1 1
+;; Pentium do have U and V pipes. Instruction to both pipes
+;; are alwyas issued together, much like on VLIW.
+;;
+;; predecode
+;; / \
+;; decodeu decodev
+;; / | |
+;; fpu executeu executev
+;; | | |
+;; fpu retire retire
+;; |
+;; fpu
+;; We add dummy "port" pipes allocated only first cycle of
+;; instruction to specify this behaviour.
+
+(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
+(define_cpu_unit "pentium-u,pentium-v" "pentium")
+(absence_set "pentium-portu" "pentium-u,pentium-v")
+(presence_set "pentium-portv" "pentium-portu")
+
+;; Floating point instructions can overlap with new issue of integer
+;; instructions. We model only first cycle of FP pipeline, as it is
+;; fully pipelined.
+(define_cpu_unit "pentium-fp" "pentium_fpu")
+
+;; There is non-pipelined multiplier unit used for complex operations.
+(define_cpu_unit "pentium-fmul" "pentium_fpu")
+
+;; Pentium preserves memory ordering, so when load-execute-store
+;; instruction is executed together with other instruction loading
+;; data, the execution of the other instruction is delayed to very
+;; last cycle of first instruction, when data are bypassed.
+;; We model this by allocating "memory" unit when store is pending
+;; and using conflicting load units together.
+
+(define_cpu_unit "pentium-memory" "pentium")
+(define_cpu_unit "pentium-load0" "pentium")
+(define_cpu_unit "pentium-load1" "pentium")
+(absence_set "pentium-load0,pentium-load1" "pentium-memory")
+
+(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
+(define_reservation "pentium-np" "(pentium-u + pentium-v)")
+(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
+(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
+(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
+(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
+(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
+(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
+(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
+(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
+ + pentium-memory)")
+(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstu
+ + pentium-memory)")
+(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
+ + pentium-memory)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+
+;; Few common long latency instructions
+(define_insn_reservation "pent_mul" 11
(and (eq_attr "cpu" "pentium")
(eq_attr "type" "imul"))
- 11 11)
+ "pentium-np*11")
-;; Rep movs takes minimally 12 cycles.
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_str" 12
(and (eq_attr "cpu" "pentium")
(eq_attr "type" "str"))
- 12 12)
+ "pentium-np*12")
-; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
-(define_function_unit "pent_np" 1 0
+;; Integer division and some other long latency instruction block all
+;; units, including the FP pipe. There is no value in modeling the
+;; latency of these instructions and not modeling the latency
+;; decreases the size of the DFA.
+(define_insn_reservation "pent_block" 1
(and (eq_attr "cpu" "pentium")
(eq_attr "type" "idiv"))
- 46 46)
+ "pentium-np+pentium-fp")
-; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode,
-; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF.
-; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable.
-; The integer <-> fp conversion is not modeled correctly. Fild behaves
-; like normal fp operation and fist takes 6 cycles.
+(define_insn_reservation "pent_cld" 2
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "cld"))
+ "pentium-np*2")
-(define_function_unit "fpu" 1 0
+;; Moves usually have one cycle penalty, but there are exceptions.
+(define_insn_reservation "pent_fmov" 1
(and (eq_attr "cpu" "pentium")
(and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF"))))
- 3 3)
+ (eq_attr "memory" "none,load")))
+ "(pentium-fp+pentium-np)")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_fpmovxf" 3
(and (eq_attr "cpu" "pentium")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load,store")
(eq_attr "mode" "XF"))))
- 3 3)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (ior (match_operand 1 "immediate_operand" "")
- (eq_attr "memory" "store"))))
- 2 2)
+ "(pentium-fp+pentium-np)*3")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_fpstore" 2
(and (eq_attr "cpu" "pentium")
(and (eq_attr "type" "fmov")
- (ior (match_operand 1 "immediate_operand" "")
- (eq_attr "memory" "store"))))
- 2 2)
+ (ior (match_operand 1 "immediate_operand" "")
+ (eq_attr "memory" "store"))))
+ "(pentium-fp+pentium-np)*2")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_imov" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "cld"))
- 2 2)
-
-(define_function_unit "fpu" 1 0
+ (eq_attr "type" "imov"))
+ "pentium-firstuv")
+
+;; Push and pop instructions have 1 cycle latency and special
+;; hardware bypass allows them to be paired with other push,pop
+;; and call instructions.
+(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
+(define_insn_reservation "pent_push" 1
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (eq_attr "memory" "none,load")))
- 1 1)
+ (and (eq_attr "type" "push")
+ (eq_attr "memory" "store")))
+ "pentium-firstuv")
-; Read/Modify/Write instructions usually take 3 cycles.
-(define_function_unit "pent_u" 1 0
+(define_insn_reservation "pent_pop" 1
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "pu")
- (eq_attr "memory" "both"))))
- 3 3)
+ (eq_attr "type" "pop"))
+ "pentium-firstuv")
-(define_function_unit "pent_uv" 2 0
+;; Call and branch instruction can execute in either pipe, but
+;; they are only pairable when in the v pipe.
+(define_insn_reservation "pent_call" 10
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "both"))))
- 3 3)
+ (eq_attr "type" "call,callv"))
+ "pentium-firstv,pentium-v*9")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_branch" 1
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,negnot,ishift")
- (and (eq_attr "pent_pair" "np")
- (eq_attr "memory" "both"))))
- 3 3)
-
-; Read/Modify or Modify/Write instructions usually take 2 cycles.
-(define_function_unit "pent_u" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "pu")
- (eq_attr "memory" "load,store"))))
- 2 2)
+ (eq_attr "type" "ibr"))
+ "pentium-firstv")
-(define_function_unit "pent_uv" 2 0
+;; Floating point instruction dispatch in U pipe, but continue
+;; in FP pipeline allowing other isntructions to be executed.
+(define_insn_reservation "pent_fp" 3
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "load,store"))))
- 2 2)
+ (eq_attr "type" "fop,fistp"))
+ "(pentium-firstu+pentium-fp),nothing,nothing")
-(define_function_unit "pent_np" 1 0
+;; First two cycles of fmul are not pipelined.
+(define_insn_reservation "pent_fmul" 3
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "np")
- (eq_attr "memory" "load,store"))))
- 2 2)
+ (eq_attr "type" "fmul"))
+ "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
-; Insns w/o memory operands and move instructions usually take one cycle.
-(define_function_unit "pent_u" 1 0
+;; Long latency FP instructions overlap with integer instructions,
+;; but only last 2 cycles with FP ones.
+(define_insn_reservation "pent_fdiv" 39
(and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "pu"))
- 1 1)
+ (eq_attr "type" "fdiv"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
-(define_function_unit "pent_v" 1 0
+(define_insn_reservation "pent_fpspc" 70
(and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "pv"))
- 1 1)
+ (eq_attr "type" "fpspc"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
-(define_function_unit "pent_uv" 2 0
+;; Integer instructions. Load/execute/store takes 3 cycles,
+;; load/execute 2 cycles and execute only one cycle.
+(define_insn_reservation "pent_uv_both" 3
(and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "!np"))
- 1 1)
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "both")))
+ "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_u_both" 3
(and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "np"))
- 1 1)
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "both")))
+ "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
-; Pairable insns only conflict with other non-pairable insns.
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_v_both" 3
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "both"))))
- 3 3
- [(eq_attr "pent_pair" "np")])
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "both")))
+ "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_np_both" 3
(and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "load,store"))))
- 2 2
- [(eq_attr "pent_pair" "np")])
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "both")))
+ "pentium-np,pentium-np,pentium-np")
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_uv_load" 2
(and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "!np"))
- 1 1
- [(eq_attr "pent_pair" "np")])
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "load")))
+ "pentium-firstuvload,pentium-uv")
-; Floating point instructions usually blocks cycle longer when combined with
-; integer instructions, because of the inpaired fxch instruction.
-(define_function_unit "pent_np" 1 0
+(define_insn_reservation "pent_u_load" 2
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmov,fop,fsgn,fmul,fpspc,fcmov,fcmp,fistp"))
- 2 2
- [(eq_attr "type" "!fmov,fop,fsgn,fmul,fpspc,fcmov,fcmp,fistp")])
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "load")))
+ "pentium-firstuload,pentium-u")
-(define_function_unit "fpu" 1 0
+(define_insn_reservation "pent_v_load" 2
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fcmp,fxch,fsgn"))
- 1 1)
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "load")))
+ "pentium-firstvload,pentium-v")
-; Addition takes 3 cycles; assume other random cruft does as well.
-; ??? Trivial fp operations such as fabs or fchs takes only one cycle.
-(define_function_unit "fpu" 1 0
+(define_insn_reservation "pent_np_load" 2
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fop,fistp"))
- 3 1)
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "load")))
+ "pentium-np,pentium-np")
-; Multiplication takes 3 cycles and is only half pipelined.
-(define_function_unit "fpu" 1 0
+(define_insn_reservation "pent_uv" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmul"))
- 3 1)
-
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmul"))
- 2 2)
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "none")))
+ "pentium-firstuv")
-; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles.
-; They can overlap with integer insns. Only the last two cycles can overlap
-; with other fp insns. Only fsin/fcos can overlap with multiplies.
-; Only last two cycles of fsin/fcos can overlap with other instructions.
-(define_function_unit "fpu" 1 0
+(define_insn_reservation "pent_u" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fdiv"))
- 39 37)
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "none")))
+ "pentium-firstu")
-(define_function_unit "pent_mul" 1 1
+(define_insn_reservation "pent_v" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fdiv"))
- 39 39)
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "none")))
+ "pentium-firstv")
-(define_function_unit "fpu" 1 0
+(define_insn_reservation "pent_np" 1
(and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fpspc"))
- 70 68)
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "none")))
+ "pentium-np")
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fpspc"))
- 70 70)
\f
;; Pentium Pro/PII Scheduling
;;