From 6ad48e84df035fd325684bf59baf86c67ae1bdb1 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 15 Aug 2001 14:28:46 +0200 Subject: [PATCH] predict.c (struct block_info_def): Remove nvisited. * predict.c (struct block_info_def): Remove nvisited. (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions. (estimate_bb_frequencies): Call mark_dfs_back_edges. * i386.c (ix86_agi_depdendant): Lea causes AGI only on the Pentium (ix86_adjust_cost): Teach scheduler that latency to load operand can be masked. From-SVN: r44921 --- gcc/ChangeLog | 10 ++++++ gcc/config/i386/i386.c | 72 ++++++++++++++++++++++++++++++++++++------ gcc/predict.def | 12 +++---- 3 files changed, 79 insertions(+), 15 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f0784020ae..7ace5598397 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +Wed Aug 15 14:24:22 CEST 2001 Jan Hubicka + + * predict.c (struct block_info_def): Remove nvisited. + (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions. + (estimate_bb_frequencies): Call mark_dfs_back_edges. + + * i386.c (ix86_agi_depdendant): Lea causes AGI only on the Pentium + (ix86_adjust_cost): Teach scheduler that latency to load operand can + be masked. + Wed Aug 15 12:41:32 CEST 2001 Jan Hubicka * predict.def: Update hitrates. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b182de41f80..ed62fab713f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -8450,7 +8450,8 @@ ix86_agi_dependant (insn, dep_insn, insn_type) { rtx addr; - if (insn_type == TYPE_LEA) + if (insn_type == TYPE_LEA + && TARGET_PENTIUM) { addr = PATTERN (insn); if (GET_CODE (addr) == SET) @@ -8485,7 +8486,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost) int cost; { enum attr_type insn_type, dep_insn_type; - enum attr_memory memory; + enum attr_memory memory, dep_memory; rtx set, set2; int dep_insn_code_number; @@ -8521,12 +8522,14 @@ ix86_adjust_cost (insn, link, dep_insn, cost) break; case PROCESSOR_PENTIUMPRO: + memory = get_attr_memory (insn); + dep_memory = get_attr_memory (dep_insn); + /* Since we can't represent delayed latencies of load+operation, increase the cost here for non-imov insns. */ if (dep_insn_type != TYPE_IMOV - && dep_insn_type != TYPE_FMOV - && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD) - || memory == MEMORY_BOTH)) + && dep_insn_type != TYPE_FMOV + && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) cost += 1; /* INT->FP conversion is expensive. */ @@ -8540,9 +8543,26 @@ ix86_adjust_cost (insn, link, dep_insn, cost) && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) && GET_CODE (SET_DEST (set2)) == MEM) cost += 1; + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependant (insn, dep_insn, insn_type)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 1) + cost--; + } break; case PROCESSOR_K6: + memory = get_attr_memory (insn); + dep_memory = get_attr_memory (dep_insn); /* The esp dependency is resolved before the instruction is really finished. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) @@ -8551,24 +8571,58 @@ ix86_adjust_cost (insn, link, dep_insn, cost) /* Since we can't represent delayed latencies of load+operation, increase the cost here for non-imov insns. */ - if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD) - || memory == MEMORY_BOTH) + if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; /* INT->FP conversion is expensive. */ if (get_attr_fp_int_src (dep_insn)) cost += 5; + + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependant (insn, dep_insn, insn_type)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 1; + else if (cost > 2) + cost -= 2; + else + cost = 1; + } break; case PROCESSOR_ATHLON: - if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD - || memory == MEMORY_BOTH) + memory = get_attr_memory (insn); + dep_memory = get_attr_memory (dep_insn); + + if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) { if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) cost += 2; else cost += 3; } + /* Show ability of reorder buffer to hide latency of load by executing + in parallel with previous instruction in case + previous instruction is not needed to compute the address. */ + if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) + && !ix86_agi_dependant (insn, dep_insn, insn_type)) + { + /* Claim moves to take one cycle, as core can issue one load + at time and the next load can start cycle later. */ + if (dep_insn_type == TYPE_IMOV + || dep_insn_type == TYPE_FMOV) + cost = 0; + else if (cost >= 3) + cost -= 3; + else + cost = 0; + } default: break; diff --git a/gcc/predict.def b/gcc/predict.def index a4f7afb05d5..6496ef02d87 100644 --- a/gcc/predict.def +++ b/gcc/predict.def @@ -68,11 +68,11 @@ DEF_PREDICTOR (PRED_NORETURN, "noreturn call", PROB_ALWAYS, PRED_FLAG_FIRST_MATCH) /* Loopback edge is taken. */ -DEF_PREDICTOR (PRED_LOOP_BRANCH, "loop branch", HITRATE (88), +DEF_PREDICTOR (PRED_LOOP_BRANCH, "loop branch", HITRATE (89), PRED_FLAG_FIRST_MATCH) /* Edge causing loop to terminate is probably not taken. */ -DEF_PREDICTOR (PRED_LOOP_EXIT, "loop exit", HITRATE (92), +DEF_PREDICTOR (PRED_LOOP_EXIT, "loop exit", HITRATE (90), PRED_FLAG_FIRST_MATCH) /* Condition emitted by preconditiong code to ensure that variable @@ -83,16 +83,16 @@ DEF_PREDICTOR (PRED_LOOP_CONDITION, "loop condition", PROB_VERY_LIKELY, 0) DEF_PREDICTOR (PRED_LOOP_PRECONDITIONING, "loop preconditioning", PROB_VERY_LIKELY, 0) /* Copied condition for the first iteration of loop is probably true. */ -DEF_PREDICTOR (PRED_LOOP_HEADER, "loop header", HITRATE (60), 0) +DEF_PREDICTOR (PRED_LOOP_HEADER, "loop header", HITRATE (64), 0) /* Pointers are usually not NULL. */ -DEF_PREDICTOR (PRED_POINTER, "pointer", HITRATE (75), 0) +DEF_PREDICTOR (PRED_POINTER, "pointer", HITRATE (83), 0) /* NE is probable, EQ not etc... */ -DEF_PREDICTOR (PRED_OPCODE, "opcode", HITRATE (53), 0) +DEF_PREDICTOR (PRED_OPCODE, "opcode", HITRATE (55), 0) /* Branch guarding call is probably taken. */ -DEF_PREDICTOR (PRED_CALL, "call", HITRATE (66), 0) +DEF_PREDICTOR (PRED_CALL, "call", HITRATE (70), 0) /* Branch causing function to terminate is probably not taken. */ DEF_PREDICTOR (PRED_ERROR_RETURN, "error return", PROB_LIKELY, 0) -- 2.30.2