From: David S. Miller Date: Sun, 16 Aug 1998 18:50:32 +0000 (+0000) Subject: sparc.c (ultra_code_from_mask, [...]): New functions to describe UltraSPARC pipeline... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3bc8b61efc54c66c0cf4ecc4dfa3b387bfa48108;p=gcc.git sparc.c (ultra_code_from_mask, [...]): New functions to describe UltraSPARC pipeline exactly to Haifa. * config/sparc/sparc.c (ultra_code_from_mask, ultra_cmove_results_ready_p, ultra_fpmode_conflict_exists, ultra_find_type, ultra_schedule_insn, ultra_flush_pipeline, ultrasparc_sched_init, ultrasparc_variable_issue, ultra_rescan_pipeline_state, ultrasparc_sched_reorder): New functions to describe UltraSPARC pipeline exactly to Haifa. (ultrasparc_adjust_cost): Indicate IMUL type insns have zero cost, as there is nothing the scheduler can do about it. Indicate that REG_DEP_OUTPUT's collide. Fixup formatting. * config/sparc/sparc.h (RTX_COSTS): Fixup integer multiply and divide costs on Ultra for DImode. (MD_SCHED_INIT, MD_SCHED_REORDER, MD_SCHED_VARIABLE_ISSUE): Define. * config/sparc/sparc.md (ieu_unnamed function unit): Rename to ieuN and add call_no_delay_slot to type list. (cti function unit): New unit for branches on UltraSPARC. (subx/addx insns): Set type to misc. (sidi zero/sign extension insns on arch64): Set type to shift. (sign_extendhidi2_insn): Set type to sload. From-SVN: r21767 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8f7fbfeb1e6..68164b61f07 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +Sun Aug 16 17:37:06 1998 David S. Miller + + * config/sparc/sparc.c (ultra_code_from_mask, + ultra_cmove_results_ready_p, ultra_fpmode_conflict_exists, + ultra_find_type, ultra_schedule_insn, ultra_flush_pipeline, + ultrasparc_sched_init, ultrasparc_variable_issue, + ultra_rescan_pipeline_state, ultrasparc_sched_reorder): New + functions to describe UltraSPARC pipeline exactly to Haifa. + (ultrasparc_adjust_cost): Indicate IMUL type insns have zero cost, + as there is nothing the scheduler can do about it. Indicate that + REG_DEP_OUTPUT's collide. Fixup formatting. + * config/sparc/sparc.h (RTX_COSTS): Fixup integer multiply and + divide costs on Ultra for DImode. + (MD_SCHED_INIT, MD_SCHED_REORDER, MD_SCHED_VARIABLE_ISSUE): + Define. + * config/sparc/sparc.md (ieu_unnamed function unit): Rename to + ieuN and add call_no_delay_slot to type list. + (cti function unit): New unit for branches on UltraSPARC. + (subx/addx insns): Set type to misc. + (sidi zero/sign extension insns on arch64): Set type to shift. + (sign_extendhidi2_insn): Set type to sload. + Sun Aug 16 13:52:00 1998 David Edelsohn * rs6000.c (rs6000_stack_info): Use if == 0 for sizes. diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 56cc02a7f76..0dbd10c3463 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -6102,8 +6102,8 @@ supersparc_adjust_cost (insn, link, dep_insn, cost) cycles later. */ /* if a load, then the dependence must be on the memory address; - add an extra 'cycle'. Note that the cost could be two cycles - if the reg was written late in an instruction group; we can't tell + add an extra "cycle". Note that the cost could be two cycles + if the reg was written late in an instruction group; we ca not tell here. */ if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) return cost + 3; @@ -6115,7 +6115,7 @@ supersparc_adjust_cost (insn, link, dep_insn, cost) rtx dep_pat = PATTERN (dep_insn); if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - return cost; /* This shouldn't happen! */ + return cost; /* This should not happen! */ /* The dependency between the two instructions was on the data that is being stored. Assume that this implies that the address of the @@ -6147,67 +6147,765 @@ supersparc_adjust_cost (insn, link, dep_insn, cost) return cost; } +/* This describes the state of the UltraSPARC pipeline during + instruction scheduling. */ + +#define TMASK(__x) (1U << ((int)(__x))) +#define UMASK(__x) (1U << ((int)(__x))) + +enum ultra_code { NONE=0, /* no insn at all */ + IEU0, /* shifts and conditional moves */ + IEU1, /* condition code setting insns, calls+jumps */ + IEUN, /* all other single cycle ieu insns */ + LSU, /* loads and stores */ + CTI, /* branches */ + FPM, /* FPU pipeline 1, multiplies and divides */ + FPA, /* FPU pipeline 2, all other operations */ + SINGLE, /* single issue instructions */ + NUM_ULTRA_CODES }; + +static char *ultra_code_names[NUM_ULTRA_CODES] = { + "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI", + "FPM", "FPA", "SINGLE" }; + +struct ultrasparc_pipeline_state { + /* The insns in this group. */ + rtx group[4]; + + /* The code for each insn. */ + enum ultra_code codes[4]; + + /* Which insns in this group have been committed by the + scheduler. This is how we determine how many more + can issue this cycle. */ + char commit[4]; + + /* How many insns in this group. */ + char group_size; + + /* Mask of free slots still in this group. */ + char free_slot_mask; + + /* The slotter uses the following to determine what other + insn types can still make their way into this group. */ + char contents [NUM_ULTRA_CODES]; + char num_ieu_insns; +}; + +#define ULTRA_NUM_HIST 8 +static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST]; +static int ultra_cur_hist; +static int ultra_cycles_elapsed; + +#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist]) + +/* Given TYPE_MASK compute the ultra_code it has. */ +static enum ultra_code +ultra_code_from_mask (type_mask) + int type_mask; +{ + int mask; + + if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE))) + return IEU0; + else if (type_mask & (TMASK (TYPE_COMPARE) | + TMASK (TYPE_CALL) | + TMASK (TYPE_UNCOND_BRANCH))) + return IEU1; + else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) | + TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY))) + return IEUN; + else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) | + TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) | + TMASK (TYPE_FPSTORE))) + return LSU; + else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) | + TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT))) + return FPM; + else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) | + TMASK (TYPE_FP) | TMASK (TYPE_FPCMP))) + return FPA; + else if (type_mask & TMASK (TYPE_BRANCH)) + return CTI; + + return SINGLE; +} + +/* Check INSN (a conditional move) and make sure that it's + results are available at this cycle. Return 1 if the + results are in fact ready. */ +static int +ultra_cmove_results_ready_p (insn) + rtx insn; +{ + struct ultrasparc_pipeline_state *up; + int entry, slot; + + /* If this got dispatched in the previous + group, the results are not ready. */ + entry = (ultra_cur_hist - 1) % ULTRA_NUM_HIST; + up = &ultra_pipe_hist[entry]; + slot = 4; + while (--slot >= 0) + if (up->group[slot] == insn) + return 1; + + return 0; +} + +/* Walk backwards in pipeline history looking for FPU + operations which use a mode different than FPMODE and + will create a stall if an insn using FPMODE were to be + dispatched this cycle. */ +static int +ultra_fpmode_conflict_exists (fpmode) + enum machine_mode fpmode; +{ + int hist_ent; + int hist_lim; + + hist_ent = (ultra_cur_hist - 1) % ULTRA_NUM_HIST; + if (ultra_cycles_elapsed < 4) + hist_lim = ultra_cycles_elapsed; + else + hist_lim = 4; + while (hist_lim > 0) + { + struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent]; + int slot = 4; + + while (--slot >= 0) + { + rtx insn = up->group[slot]; + enum machine_mode this_mode; + enum attr_type this_type; + rtx pat; + + if (! insn + || GET_CODE (insn) != INSN + || (pat = PATTERN (insn)) == 0 + || GET_CODE (pat) != SET) + continue; + + this_mode = GET_MODE (SET_DEST (pat)); + if ((this_mode != SFmode + && this_mode != DFmode) + || this_mode == fpmode) + continue; + + /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then + we will get a stall. */ + if (GET_CODE (SET_SRC (pat)) != ABS + && GET_CODE (SET_SRC (pat)) != NEG + && ((TMASK (get_attr_type (insn)) & + (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) | + TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT))) == 0)) + return 1; + } + hist_lim--; + hist_ent = (hist_ent - 1) % ULTRA_NUM_HIST; + } + + /* No conflicts, safe to dispatch. */ + return 0; +} + +/* Find an instruction in LIST which has one of the + type attributes enumerated in TYPE_MASK. START + says where to begin the search. + + NOTE: This scheme depends upon the fact that we + have less than 32 distinct type attributes. */ +static rtx * +ultra_find_type (type_mask, list, start) + int type_mask; + rtx *list; + int start; +{ + int i; + + for (i = start; i >= 0; i--) + { + rtx insn = list[i]; + + if (recog_memoized (insn) >= 0 + && (TMASK(get_attr_type (insn)) & type_mask)) + { + enum machine_mode fpmode; + rtx pat = 0; + int slot; + int check_depend = 0; + int check_fpmode_conflict = 0; + + if (GET_CODE (insn) == INSN + && (pat = PATTERN(insn)) != 0 + && GET_CODE (pat) == SET + && !(type_mask & (TMASK (TYPE_STORE) | + TMASK (TYPE_FPSTORE)))) + { + check_depend = 1; + if (GET_MODE (SET_DEST (pat)) == SFmode + || GET_MODE (SET_DEST (pat)) == DFmode) + { + fpmode = GET_MODE (SET_DEST (pat)); + check_fpmode_conflict = 1; + } + } + + slot = 4; + while(--slot >= 0) + { + rtx slot_insn = ultra_pipe.group[slot]; + rtx slot_pat; + + /* Already issued, bad dependency, or FPU + mode conflict. */ + if (slot_insn != 0 + && (slot_pat = PATTERN (slot_insn)) != 0 + && ((insn == slot_insn) + || (check_depend == 1 + && GET_CODE (slot_insn) == INSN + && GET_CODE (slot_pat) == SET + && rtx_equal_p (SET_DEST (slot_pat), + SET_SRC (pat))) + || (check_fpmode_conflict == 1 + && GET_CODE (slot_insn) == INSN + && GET_CODE (slot_pat) == SET + && ((GET_MODE (SET_DEST (slot_pat)) == SFmode + || GET_MODE (SET_DEST (slot_pat)) == DFmode) + && GET_MODE (SET_DEST (slot_pat)) != fpmode)))) + goto next; + } + + /* Check for peculiar result availability and dispatch + interference situations. */ + if (pat != 0 + && ultra_cycles_elapsed > 0) + { + rtx link; + + for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) + { + rtx link_insn = XEXP (link, 0); + if (GET_CODE (link_insn) == INSN + && recog_memoized (link_insn) >= 0 + && (TMASK (get_attr_type (link_insn)) & + (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE))) + && ! ultra_cmove_results_ready_p (link_insn)) + goto next; + } + + if (check_fpmode_conflict + && ultra_fpmode_conflict_exists (fpmode)) + goto next; + } + + return &list[i]; + } + next: + } + return 0; +} + +/* Place insn pointed to my IP into the pipeline. + Make element THIS of READY be that insn if it + is not already. TYPE indicates the pipeline class + this insn falls into. */ +static void +ultra_schedule_insn (ip, ready, this, type) + rtx *ip; + rtx *ready; + int this; + enum ultra_code type; +{ + int pipe_slot; + char mask = ultra_pipe.free_slot_mask; + + /* Obtain free slot. */ + for (pipe_slot = 0; pipe_slot < 4; pipe_slot++) + if ((mask & (1 << pipe_slot)) != 0) + break; + if (pipe_slot == 4) + abort (); + + /* In it goes, and it hasn't been committed yet. */ + ultra_pipe.group[pipe_slot] = *ip; + ultra_pipe.codes[pipe_slot] = type; + ultra_pipe.contents[type] = 1; + if (UMASK (type) & + (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1))) + ultra_pipe.num_ieu_insns += 1; + + ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot)); + ultra_pipe.group_size += 1; + ultra_pipe.commit[pipe_slot] = 0; + + /* Update ready list. */ + if (ip != &ready[this]) + { + rtx temp = *ip; + + *ip = ready[this]; + ready[this] = temp; + } +} + +/* Advance to the next pipeline group. */ +static void +ultra_flush_pipeline () +{ + ultra_cur_hist = (ultra_cur_hist + 1) % ULTRA_NUM_HIST; + ultra_cycles_elapsed += 1; + bzero ((char *) &ultra_pipe, sizeof ultra_pipe); + ultra_pipe.free_slot_mask = 0xf; +} + +static int ultra_reorder_called_this_block; + +/* Init our data structures for this current block. */ +void +ultrasparc_sched_init (dump, sched_verbose) + FILE *dump; + int sched_verbose; +{ + bzero ((char *) &ultra_pipe_hist, sizeof ultra_pipe_hist); + ultra_pipe.free_slot_mask = 0xf; + ultra_cur_hist = 0; + ultra_cycles_elapsed = 0; + ultra_reorder_called_this_block = 0; +} + +/* INSN has been scheduled, update pipeline commit state + and return how many instructions are still to be + scheduled in this group. */ int -ultrasparc_adjust_cost (insn, link, dep_insn, cost) - rtx insn; - rtx link; - rtx dep_insn; - int cost; +ultrasparc_variable_issue (insn) + rtx insn; +{ + struct ultrasparc_pipeline_state *up = &ultra_pipe; + int i, left_to_fire; + + left_to_fire = 0; + for (i = 0; i < 4; i++) + { + if (up->group[i] == 0) + continue; + + if (up->group[i] == insn) + { + up->commit[i] = 1; + } + else if (! up->commit[i]) + left_to_fire++; + } + + return left_to_fire; +} + +/* In actual_hazard_this_instance, we may have yanked some + instructions from the ready list due to conflict cost + adjustments. If so, and such an insn was in our pipeline + group, remove it and update state. */ +static void +ultra_rescan_pipeline_state (ready, n_ready) + rtx *ready; + int n_ready; +{ + struct ultrasparc_pipeline_state *up = &ultra_pipe; + int i; + + for (i = 0; i < 4; i++) + { + rtx insn = up->group[i]; + enum ultra_code ucode; + int j; + + if (! insn) + continue; + + /* If it has been committed, then it was removed from + the ready list because it was actually scheduled, + and that is not the case we are searching for here. */ + if (up->commit[i] != 0) + continue; + + for (j = n_ready - 1; j >= 0; j--) + if (ready[j] == insn) + break; + + /* If we didn't find it, toss it. */ + if (j < 0) + { + enum ultra_code ucode = up->codes[i]; + + up->group[i] = 0; + up->codes[i] = NONE; + up->contents[ucode] = 0; + if (UMASK (ucode) & + (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1))) + up->num_ieu_insns -= 1; + + up->free_slot_mask |= (1 << i); + up->group_size -= 1; + up->commit[i] = 0; + } + } +} + +void +ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready) + FILE *dump; + int sched_verbose; + rtx *ready; + int n_ready; +{ + struct ultrasparc_pipeline_state *up = &ultra_pipe; + int i, this_insn; + + /* We get called once unnecessarily per block of insns + scheduled. */ + if (ultra_reorder_called_this_block == 0) + { + ultra_reorder_called_this_block = 1; + return; + } + + if (sched_verbose) + { + int n; + + fprintf (dump, "\n;;\tUltraSPARC Looking at ["); + for (n = n_ready - 1; n >= 0; n--) + { + rtx insn = ready[n]; + enum ultra_code ucode; + + if (recog_memoized (insn) < 0) + continue; + ucode = ultra_code_from_mask (TMASK (get_attr_type (insn))); + if (n != 0) + fprintf (dump, "%s(%d) ", + ultra_code_names[ucode], + INSN_UID (insn)); + else + fprintf (dump, "%s(%d)", + ultra_code_names[ucode], + INSN_UID (insn)); + } + fprintf (dump, "]\n"); + } + + this_insn = n_ready - 1; + + /* Skip over junk we don't understand. */ + while ((this_insn >= 0) + && recog_memoized (ready[this_insn]) < 0) + this_insn--; + + while (this_insn >= 0) { + int old_group_size = up->group_size; + + if (up->group_size != 0) + { + int num_committed; + + num_committed = (up->commit[0] + up->commit[1] + + up->commit[2] + up->commit[3]); + /* If nothing has been commited from our group, or all of + them have. Clear out the (current cycle's) pipeline + state and start afresh. */ + if (num_committed == 0 + || num_committed == up->group_size) + { + bzero ((char *) &ultra_pipe, sizeof ultra_pipe); + ultra_pipe.free_slot_mask = 0xf; + old_group_size = 0; + } + else + { + /* OK, some ready list insns got requeued and thus removed + from the ready list. Account for this fact. */ + ultra_rescan_pipeline_state (ready, n_ready); + + /* Something "changed", make this look like a newly + formed group so the code at the end of the loop + knows that progress was in fact made. */ + if (up->group_size != old_group_size) + old_group_size == 0; + } + } + + if (up->group_size == 0) + { + /* If the pipeline is (still) empty and we have any single + group insns, get them out now as this is a good time. */ + rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_ADDRESS) | + TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) | + TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)), + ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, SINGLE); + break; + } + + /* If we are not in the process of emptying out the pipe, try to + obtain an instruction which must be the first in it's group. */ + ip = ultra_find_type ((TMASK (TYPE_CALL) | + TMASK (TYPE_CALL_NO_DELAY_SLOT) | + TMASK (TYPE_UNCOND_BRANCH)), + ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, IEU1); + this_insn--; + } + else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) | + TMASK (TYPE_FPDIVD) | + TMASK (TYPE_FPSQRT)), + ready, this_insn)) != 0) + { + ultra_schedule_insn (ip, ready, this_insn, FPM); + this_insn--; + } + } + + /* Try to fill the integer pipeline. First, look for an IEU0 specific + operation. We can't do more IEU operations if the first 3 slots are + all full or we have dispatched two IEU insns already. */ + if ((up->free_slot_mask & 0x7) != 0 + && up->num_ieu_insns < 2 + && up->contents[IEU0] == 0 + && up->contents[IEUN] == 0) + { + rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, IEU0); + this_insn--; + } + } + + /* If we can, try to find an IEU1 specific or an unnamed + IEU instruction. */ + if ((up->free_slot_mask & 0x7) != 0 + && up->num_ieu_insns < 2) + { + rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) | + TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY) | + (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)), + ready, this_insn); + if (ip) + { + rtx insn = *ip; + + ultra_schedule_insn (ip, ready, this_insn, + (!up->contents[IEU1] + && get_attr_type (insn) == TYPE_COMPARE) + ? IEU1 : IEUN); + this_insn--; + } + } + + /* If only one IEU insn has been found, try to find another unnamed + IEU operation or an IEU1 specific one. */ + if ((up->free_slot_mask & 0x7) != 0 + && up->num_ieu_insns < 2) + { + rtx *ip; + int tmask = (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) | + TMASK (TYPE_MOVE) | TMASK (TYPE_UNARY)); + + if (!up->contents[IEU1]) + tmask |= TMASK (TYPE_COMPARE); + ip = ultra_find_type (tmask, ready, this_insn); + if (ip) + { + rtx insn = *ip; + + ultra_schedule_insn (ip, ready, this_insn, + (!up->contents[IEU1] + && get_attr_type (insn) == TYPE_COMPARE) + ? IEU1 : IEUN); + this_insn--; + } + } + + /* Try for a load or store, but such an insn can only be issued + if it is within' one of the first 3 slots. */ + if ((up->free_slot_mask & 0x7) != 0 + && up->contents[LSU] == 0) + { + rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) | + TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) | + TMASK (TYPE_FPSTORE)), ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, LSU); + this_insn--; + } + } + + /* Now find FPU operations, first FPM class. But not divisions or + square-roots because those will break the group up. Unlike all + the previous types, these can go in any slot. */ + if (up->free_slot_mask != 0 + && up->contents[FPM] == 0) + { + rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, FPM); + this_insn--; + } + } + + /* Continue on with FPA class if we have not filled the group already. */ + if (up->free_slot_mask != 0 + && up->contents[FPA] == 0) + { + rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) | + TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)), + ready, this_insn); + if (ip) + { + ultra_schedule_insn (ip, ready, this_insn, FPA); + this_insn--; + } + } + + /* Finally, maybe stick a branch in here. */ + if (up->free_slot_mask != 0 + && up->contents[CTI] == 0) + { + rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn); + + /* Try to slip in a branch only if it is one of the + next 2 in the ready list. */ + if (ip && ((&ready[this_insn] - ip) < 2)) + { + ultra_schedule_insn (ip, ready, this_insn, CTI); + this_insn--; + } + } + + up->group_size = 0; + for (i = 0; i < 4; i++) + if ((up->free_slot_mask & (1 << i)) == 0) + up->group_size++; + + /* See if we made any progress... */ + if (old_group_size != up->group_size) + break; + + /* Clean out the (current cycle's) pipeline state + and try once more. */ + bzero ((char *) &ultra_pipe, sizeof ultra_pipe); + ultra_pipe.free_slot_mask = 0xf; + } + + if (sched_verbose) + { + int n, gsize; + + fprintf (dump, ";;\tUltraSPARC Launched ["); + gsize = up->group_size; + for (n = 0; n < 4; n++) + { + rtx insn = up->group[n]; + + if (! insn) + continue; + + gsize -= 1; + if (gsize != 0) + fprintf (dump, "%s(%d) ", + ultra_code_names[up->codes[n]], + INSN_UID (insn)); + else + fprintf (dump, "%s(%d)", + ultra_code_names[up->codes[n]], + INSN_UID (insn)); + } + fprintf (dump, "]\n"); + } +} + +int +ultrasparc_adjust_cost (insn, link, dep_insn, previous, cost) + rtx insn; + rtx link; + rtx dep_insn; + rtx previous; + int cost; { enum attr_type insn_type, dep_type; - rtx pat = PATTERN(insn); - rtx dep_pat = PATTERN (dep_insn); + rtx pat = PATTERN(insn); + rtx dep_pat = PATTERN (dep_insn); - if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) - return cost; + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; - insn_type = get_attr_type (insn); - dep_type = get_attr_type (dep_insn); + insn_type = get_attr_type (insn); + dep_type = get_attr_type (dep_insn); + + /* Nothing issues in parallel with integer multiplies, so + mark as zero cost since the scheduler can not do anything + about it. */ + if (insn_type == TYPE_IMUL) + return 0; #define SLOW_FP(dep_type) \ (dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD) switch (REG_NOTE_KIND (link)) - { - case 0: + { + case 0: /* Data dependency; DEP_INSN writes a register that INSN reads some - cycles later. */ + cycles later. */ + + if (dep_type == TYPE_CMOVE) + { + /* Instructions that read the result of conditional moves cannot + be in the same group or the following group. */ + return cost + 1; + } switch (insn_type) - { - /* UltraSPARC can dual issue a store and an instruction setting - the value stored, except for divide and square root. */ + { + /* UltraSPARC can dual issue a store and an instruction setting + the value stored, except for divide and square root. */ case TYPE_FPSTORE: - if (! SLOW_FP (dep_type)) - return 0; + if (! SLOW_FP (dep_type)) + return 0; return cost; - case TYPE_STORE: + case TYPE_STORE: if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - return cost; + return cost; if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) - /* The dependency between the two instructions is on the data - that is being stored. Assume that the address of the store - is not also dependent. */ - return 0; - return cost; - - case TYPE_LOAD: - case TYPE_SLOAD: - case TYPE_FPLOAD: - /* A load does not return data until at least 11 cycles after + /* The dependency between the two instructions is on the data + that is being stored. Assume that the address of the store + is not also dependent. */ + return 0; + return cost; + + case TYPE_LOAD: + case TYPE_SLOAD: + case TYPE_FPLOAD: + /* A load does not return data until at least 11 cycles after a store to the same location. 3 cycles are accounted for in the load latency; add the other 8 here. */ if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) - { + { /* If the addresses are not equal this may be a false dependency because pointer aliasing could not be determined. Add only 2 cycles in that case. 2 is an arbitrary compromise between 8, which would cause the scheduler to generate worse code elsewhere to - compensate for a dependency which might not really - exist, and 0. */ + compensate for a dependency which might not really + exist, and 0. */ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET || GET_CODE (SET_SRC (pat)) != MEM || GET_CODE (SET_DEST (dep_pat)) != MEM @@ -6215,72 +6913,73 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost) XEXP (SET_DEST (dep_pat), 0))) return cost + 2; - return cost + 8; - } + return cost + 8; + } return cost; - case TYPE_BRANCH: + case TYPE_BRANCH: /* Compare to branch latency is 0. There is no benefit from separating compare and branch. */ - if (dep_type == TYPE_COMPARE) - return 0; - /* Floating point compare to branch latency is less than - compare to conditional move. */ - if (dep_type == TYPE_FPCMP) - return cost - 1; + if (dep_type == TYPE_COMPARE) + return 0; + /* Floating point compare to branch latency is less than + compare to conditional move. */ + if (dep_type == TYPE_FPCMP) + return cost - 1; return cost; - case TYPE_FPCMOVE: + case TYPE_FPCMOVE: /* FMOVR class instructions can not issue in the same cycle or the cycle after an instruction which writes any integer register. Model this as cost 2 for dependent - instructions. */ + instructions. */ if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY || dep_type == TYPE_BINARY) - && cost < 2) + && cost < 2) return 2; /* Otherwise check as for integer conditional moves. */ - case TYPE_CMOVE: + case TYPE_CMOVE: /* Conditional moves involving integer registers wait until 3 cycles after loads return data. The interlock applies to all loads, not just dependent loads, but that is hard - to model. */ - if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD) - return cost + 3; + to model. */ + if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD) + return cost + 3; return cost; default: break; } - break; + break; - case REG_DEP_ANTI: + case REG_DEP_ANTI: /* Divide and square root lock destination registers for full latency. */ - if (! SLOW_FP (dep_type)) - return 0; - break; + if (! SLOW_FP (dep_type)) + return 0; + break; + + case REG_DEP_OUTPUT: + /* IEU and FPU instruction that have the same destination + register cannot be grouped together. */ + return cost + 1; default: break; } - /* Other costs not accounted for: - - Multiply should be modeled as having no latency because there is - nothing the scheduler can do about it. - - Single precision floating point loads lock the other half of - the even/odd register pair. + /* Other costs not accounted for: + - Single precision floating point loads lock the other half of + the even/odd register pair. - Several hazards associated with ldd/std are ignored because these - instructions are rarely generated for V9. - - A shift following an integer instruction which does not set the - condition codes can not issue in the same cycle. + instructions are rarely generated for V9. - The floating point pipeline can not have both a single and double precision operation active at the same time. Format conversions and graphics instructions are given honorary double precision status. - call and jmpl are always the first instruction in a group. */ - return cost; -} + return cost; +} int sparc_issue_rate () diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index c643897fe67..81c72fd4e83 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -2704,11 +2704,17 @@ extern struct rtx_def *legitimize_pic_address (); #define RTX_COSTS(X,CODE,OUTER_CODE) \ case MULT: \ + if (sparc_cpu == PROCESSOR_ULTRASPARC) \ + return (GET_MODE (X) == DImode ? \ + COSTS_N_INSNS (34) : COSTS_N_INSNS (19)); \ return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \ case DIV: \ case UDIV: \ case MOD: \ case UMOD: \ + if (sparc_cpu == PROCESSOR_ULTRASPARC) \ + return (GET_MODE (X) == DImode ? \ + COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); \ return COSTS_N_INSNS (25); \ /* Make FLOAT and FIX more expensive than CONST_DOUBLE,\ so that cse will favor the latter. */ \ @@ -2723,9 +2729,28 @@ extern struct rtx_def *legitimize_pic_address (); if (sparc_cpu == PROCESSOR_SUPERSPARC) \ (COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST); \ else if (sparc_cpu == PROCESSOR_ULTRASPARC) \ - (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST); \ + (COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, \ + last_scheduled_insn, COST);\ else +extern void ultrasparc_sched_reorder (); +extern void ultrasparc_sched_init (); +extern int ultrasparc_variable_issue (); + +#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE) \ + if (sparc_cpu == PROCESSOR_ULTRASPARC) \ + ultrasparc_sched_init (DUMP, SCHED_VERBOSE) + +#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY) \ + if (sparc_cpu == PROCESSOR_ULTRASPARC) \ + ultrasparc_sched_reorder (DUMP, SCHED_VERBOSE, READY, N_READY) + +#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \ + if (sparc_cpu == PROCESSOR_ULTRASPARC) \ + (CAN_ISSUE_MORE) = ultrasparc_variable_issue (INSN); \ + else \ + (CAN_ISSUE_MORE)-- + /* Conditional branches with empty delay slots have a length of two. */ #define ADJUST_INSN_LENGTH(INSN, LENGTH) \ if (GET_CODE (INSN) == CALL_INSN \ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 83ade02a35e..cd23e8260c1 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -398,9 +398,9 @@ (eq_attr "type" "store,fpstore")) 1 1) -(define_function_unit "ieu_unnamed" 2 0 +(define_function_unit "ieuN" 2 0 (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "ialu,binary,move,unary,shift,cmove,compare,call")) + (eq_attr "type" "ialu,binary,move,unary,shift,compare,call,call_no_delay_slot,uncond_branch")) 1 1) (define_function_unit "ieu0" 1 0 @@ -415,7 +415,12 @@ (define_function_unit "ieu1" 1 0 (and (eq_attr "cpu" "ultrasparc") - (eq_attr "type" "compare,call,uncond_branch")) + (eq_attr "type" "compare,call,call_no_delay_slot,uncond_branch")) + 1 1) + +(define_function_unit "cti" 1 0 + (and (eq_attr "cpu" "ultrasparc") + (eq_attr "type" "branch")) 1 1) ;; Timings; throughput/latency @@ -1416,7 +1421,7 @@ (match_operand:SI 1 "arith_operand" "rI")))] "! TARGET_LIVE_G0" "subx\\t%%g0, %1, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*neg_sltu_plus_x" @@ -1425,7 +1430,7 @@ (match_operand:SI 1 "arith_operand" "rI"))))] "! TARGET_LIVE_G0" "subx\\t%%g0, %1, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*sgeu_insn" @@ -1454,7 +1459,7 @@ (match_operand:SI 1 "arith_operand" "rI")))] "! TARGET_LIVE_G0" "addx\\t%%g0, %1, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*sltu_plus_x_plus_y" @@ -1464,7 +1469,7 @@ (match_operand:SI 2 "arith_operand" "rI"))))] "" "addx\\t%1, %2, %0" - [(set_attr "type" "binary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*x_minus_sltu" @@ -1473,7 +1478,7 @@ (ltu:SI (reg:CC 100) (const_int 0))))] "" "subx\\t%1, 0, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) ;; ??? Combine should canonicalize these next two to the same pattern. @@ -1484,7 +1489,7 @@ (ltu:SI (reg:CC 100) (const_int 0))))] "" "subx\\t%r1, %2, %0" - [(set_attr "type" "binary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*x_minus_sltu_plus_y" @@ -1494,7 +1499,7 @@ (match_operand:SI 2 "arith_operand" "rI"))))] "" "subx\\t%r1, %2, %0" - [(set_attr "type" "binary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*sgeu_plus_x" @@ -1503,7 +1508,7 @@ (match_operand:SI 1 "register_operand" "r")))] "" "subx\\t%1, -1, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*x_minus_sgeu" @@ -1512,7 +1517,7 @@ (geu:SI (reg:CC 100) (const_int 0))))] "" "addx\\t%1, -1, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_split @@ -2167,7 +2172,8 @@ (unspec:SI [(match_operand:SI 2 "immediate_operand" "in")] 0)))] "flag_pic" "or\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "movsi_high_pic" [(set (match_operand:SI 0 "register_operand" "=r") @@ -2212,7 +2218,8 @@ (match_operand:SI 3 "" "")] 5)))] "flag_pic" "or\\t%1, %%lo(%a3-(%a2-.)), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_expand "movdi" [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "") @@ -2394,7 +2401,8 @@ (unspec:DI [(match_operand:DI 2 "immediate_operand" "in")] 0)))] "TARGET_ARCH64 && flag_pic" "or\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "*pic_sethi_di" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2426,7 +2434,8 @@ (match_operand:DI 2 "symbolic_operand" "")))] "TARGET_CM_MEDLOW" "or\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "seth44" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2451,7 +2460,8 @@ (match_operand:DI 2 "symbolic_operand" "")))] "TARGET_CM_MEDMID" "or\\t%1, %%l44(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "sethh" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2475,7 +2485,8 @@ (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")] 18)))] "TARGET_CM_MEDANY" "or\\t%1, %%hm(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "setlo" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2483,7 +2494,8 @@ (match_operand:DI 2 "symbolic_operand" "")))] "TARGET_CM_MEDANY" "or\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "embmedany_sethi" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2499,7 +2511,8 @@ (match_operand:DI 2 "data_segment_operand" "")))] "TARGET_CM_EMBMEDANY" "add\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "embmedany_brsum" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2530,7 +2543,8 @@ (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")] 15)))] "TARGET_CM_EMBMEDANY" "or\\t%1, %%ulo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_insn "embmedany_textlo" [(set (match_operand:DI 0 "register_operand" "=r") @@ -2538,7 +2552,8 @@ (match_operand:DI 2 "text_segment_operand" "")))] "TARGET_CM_EMBMEDANY" "or\\t%1, %%lo(%a2), %0" - [(set_attr "length" "1")]) + [(set_attr "type" "ialu") + (set_attr "length" "1")]) ;; Now some patterns to help reload out a bit. (define_expand "reload_indi" @@ -3797,7 +3812,7 @@ "@ srl\\t%1, 0, %0 lduw\\t%1, %0" - [(set_attr "type" "unary,load") + [(set_attr "type" "shift,load") (set_attr "length" "1")]) (define_insn "*zero_extendsidi2_insn_sp32" @@ -4090,7 +4105,7 @@ (sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))] "TARGET_ARCH64" "ldsh\\t%1, %0" - [(set_attr "type" "load") + [(set_attr "type" "sload") (set_attr "length" "1")]) (define_expand "extendsidi2" @@ -4106,7 +4121,7 @@ "@ sra\\t%1, 0, %0 ldsw\\t%1, %0" - [(set_attr "type" "unary,sload") + [(set_attr "type" "shift,sload") (set_attr "length" "1")]) ;; Special pattern for optimizing bit-field compares. This is needed @@ -4469,7 +4484,7 @@ (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] "TARGET_ARCH64" "addx\\t%r1, %2, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "subx" @@ -4479,7 +4494,7 @@ (ltu:SI (reg:CC_NOOV 100) (const_int 0))))] "" "subx\\t%r1, %2, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*subx_extend_sp64" @@ -4489,7 +4504,7 @@ (ltu:SI (reg:CC_NOOV 100) (const_int 0)))))] "TARGET_ARCH64" "subx\\t%r1, %2, %0" - [(set_attr "type" "unary") + [(set_attr "type" "misc") (set_attr "length" "1")]) (define_insn "*subx_extend" @@ -4547,7 +4562,9 @@ (plus:DI (match_operand:DI 1 "arith_double_operand" "%r") (match_operand:DI 2 "arith_double_operand" "rHI")))] "TARGET_ARCH64" - "add\\t%1, %2, %0") + "add\\t%1, %2, %0" + [(set_attr "type" "binary") + (set_attr "length" "1")]) (define_insn "addsi3" [(set (match_operand:SI 0 "register_operand" "=r,d") @@ -7016,7 +7033,9 @@ (define_insn "nop" [(const_int 0)] "" - "nop") + "nop" + [(set_attr "type" "ialu") + (set_attr "length" "1")]) (define_expand "indirect_jump" [(set (pc) (match_operand 0 "address_operand" "p"))] @@ -7096,7 +7115,8 @@ [(unspec_volatile [(const_int 0)] 1)] "" "* return TARGET_V9 ? \"flushw\" : \"ta\\t3\";" - [(set_attr "type" "misc")]) + [(set_attr "type" "misc") + (set_attr "length" "1")]) (define_insn "goto_handler_and_restore" [(unspec_volatile [(match_operand:SI 0 "register_operand" "=r")] 2)] @@ -7162,7 +7182,8 @@ [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 4)] "" "* return TARGET_V9 ? \"flush\\t%f0\" : \"iflush\\t%f0\";" - [(set_attr "type" "misc")]) + [(set_attr "type" "misc") + (set_attr "length" "1")]) ;; find first set. @@ -7527,7 +7548,8 @@ [(trap_if (const_int 1) (const_int 5))] "" "ta\\t5" - [(set_attr "type" "misc")]) + [(set_attr "type" "misc") + (set_attr "length" "1")]) (define_expand "conditional_trap" [(trap_if (match_operator 0 "noov_compare_op" @@ -7543,12 +7565,14 @@ (match_operand:SI 1 "arith_operand" "rM"))] "" "t%C0\\t%1" - [(set_attr "type" "misc")]) + [(set_attr "type" "misc") + (set_attr "length" "1")]) (define_insn "" [(trap_if (match_operator 0 "noov_compare_op" [(reg:CCX 100) (const_int 0)]) (match_operand:SI 1 "arith_operand" "rM"))] "TARGET_V9" "t%C0\\t%%xcc, %1" - [(set_attr "type" "misc")]) + [(set_attr "type" "misc") + (set_attr "length" "1")])