From 7a3446ec7cdc75bedc1e0b47daea93146c5f8a78 Mon Sep 17 00:00:00 2001 From: Joshua Kinard Date: Mon, 6 Oct 2008 19:41:10 +0000 Subject: [PATCH] invoke.texi: List r1x000 family under the -march MIPS option. gcc/ 2008-10-06 Joshua Kinard * doc/invoke.texi: List r1x000 family under the -march MIPS option. * config/mips/mips.h (PROCESSOR_R10000): New processor_type. * config/mips/mips.c (mips_cpu_info_table): Add r10000, r12000, r14000 and r16000. (mips_rtx_cost_data): Add a PROCESSOR_R10000 entry. (mips_issue_rate): Handle PROCESSOR_R10000. * config/mips/mips.md (cpu): Add r10000. Include r10000.md. * config/mips/10000.md: New file. From-SVN: r140913 --- gcc/ChangeLog | 12 ++ gcc/config/mips/10000.md | 253 +++++++++++++++++++++++++++++++++++++++ gcc/config/mips/mips.c | 22 +++- gcc/config/mips/mips.h | 1 + gcc/config/mips/mips.md | 3 +- gcc/doc/invoke.texi | 1 + 6 files changed, 290 insertions(+), 2 deletions(-) create mode 100644 gcc/config/mips/10000.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cbafd381198..b983fe556dc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2008-10-06 Joshua Kinard + + * doc/invoke.texi: List r1x000 family under the -march MIPS option. + * config/mips/mips.h (PROCESSOR_R10000): New processor_type. + * config/mips/mips.c (mips_cpu_info_table): Add r10000, r12000, + r14000 and r16000. + (mips_rtx_cost_data): Add a PROCESSOR_R10000 entry. + (mips_issue_rate): Handle PROCESSOR_R10000. + * config/mips/mips.md (cpu): Add r10000. + Include r10000.md. + * config/mips/10000.md: New file. + 2008-10-06 Richard Sandiford * config/rs6000/rs6000-protos.h (rs6000_find_base_term): Declare. diff --git a/gcc/config/mips/10000.md b/gcc/config/mips/10000.md new file mode 100644 index 00000000000..ad21e9e936e --- /dev/null +++ b/gcc/config/mips/10000.md @@ -0,0 +1,253 @@ +;; DFA-based pipeline description for the VR1x000. +;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; R12K/R14K/R16K are derivatives of R10K, thus copy its description +;; until specific tuning for each is added. + +;; R10000 has an int queue, fp queue, address queue. +;; The int queue feeds ALU1 and ALU2. +;; The fp queue feeds the fp-adder and fp-multiplier. +;; The addr queue feeds the Load/Store unit. +;; +;; However, we define the fp-adder and fp-multiplier as +;; separate automatons, because the fp-multiplier is +;; divided into fp-multiplier, fp-division, and +;; fp-squareroot units, all of which share the same +;; issue and completion logic, yet can operate in +;; parallel. +;; +;; This is based on the model described in the R10K Manual +;; and it helps to reduce the size of the automata. +(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr, + r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt") + +(define_cpu_unit "r10k_alu1" "r10k_a_int") +(define_cpu_unit "r10k_alu2" "r10k_a_int") +(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder") +(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy") +(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv") +(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt") +(define_cpu_unit "r10k_loadstore" "r10k_a_addr") + + +;; R10k Loads and Stores. +(define_insn_reservation "r10k_load" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "load,prefetch,prefetchx")) + "r10k_loadstore") + +(define_insn_reservation "r10k_store" 0 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "store,fpstore,fpidxstore")) + "r10k_loadstore") + +(define_insn_reservation "r10k_fpload" 3 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fpload,fpidxload")) + "r10k_loadstore") + + +;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2. +;; Miscellaneous arith goes here too (this is a guess). +(define_insn_reservation "r10k_arith" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical")) + "r10k_alu1 | r10k_alu2") + +;; We treat mfhilo differently, because we need to know when +;; it's HI and when it's LO. +(define_insn_reservation "r10k_mfhi" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "mfhilo") + (not (match_operand 1 "lo_operand")))) + "r10k_alu1 | r10k_alu2") + +(define_insn_reservation "r10k_mflo" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "mfhilo") + (match_operand 1 "lo_operand"))) + "r10k_alu1 | r10k_alu2") + + +;; ALU1 handles shifts, branch eval, and condmove. +;; +;; Brancher is separate, but part of ALU1, but can only +;; do one branch per cycle (is this even implementable?). +;; +;; Unsure if the brancher handles jumps and calls as well, but since +;; they're related, we'll add them here for now. +(define_insn_reservation "r10k_brancher" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "shift,branch,jump,call")) + "r10k_alu1") + +(define_insn_reservation "r10k_int_cmove" 1 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SI,DI"))) + "r10k_alu1") + + +;; Coprocessor Moves. +;; mtc1/dmtc1 are handled by ALU1. +;; mfc1/dmfc1 are handled by the fp-multiplier. +(define_insn_reservation "r10k_mt_xfer" 3 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "mtc")) + "r10k_alu1") + +(define_insn_reservation "r10k_mf_xfer" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "mfc")) + "r10k_fpmpy") + + +;; Only ALU2 does int multiplications and divisions. +;; +;; According to the Vr10000 series user manual, +;; integer mult and div insns can be issued one +;; cycle earlier if using register Lo. We model +;; this by using the Lo value by default, as it +;; is the more common value, and use a bypass +;; for the Hi value when needed. +;; +;; Also of note, There are different latencies +;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7). +;; However, gcc does not have separate types +;; for these insns. Thus to strike a balance, +;; we use the Hi latency value for imul +;; operations until the imul type can be split. +(define_insn_reservation "r10k_imul_single" 6 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 6") + +(define_insn_reservation "r10k_imul_double" 10 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "imul,imul3") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 10") + +;; Divides keep ALU2 busy. +(define_insn_reservation "r10k_idiv_single" 34 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "r10k_alu2 * 35") + +(define_insn_reservation "r10k_idiv_double" 66 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "r10k_alu2 * 67") + +(define_bypass 35 "r10k_idiv_single" "r10k_mfhi") +(define_bypass 67 "r10k_idiv_double" "r10k_mfhi") + + +;; Floating point add/sub, mul, abs value, neg, comp, & moves. +(define_insn_reservation "r10k_fp_miscadd" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fadd,fabs,fneg,fcmp")) + "r10k_fpadd") + +(define_insn_reservation "r10k_fp_miscmul" 2 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fmul,fmove")) + "r10k_fpmpy") + +(define_insn_reservation "r10k_fp_cmove" 2 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "condmove") + (eq_attr "mode" "SF,DF"))) + "r10k_fpmpy") + + +;; The fcvt.s.[wl] insn has latency 4, repeat 2. +;; All other fcvt insns have latency 2, repeat 1. +(define_insn_reservation "r10k_fcvt_single" 4 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "I2S"))) + "r10k_fpadd * 2") + +(define_insn_reservation "r10k_fcvt_other" 2 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fcvt") + (eq_attr "cnv_mode" "!I2S"))) + "r10k_fpadd") + + +;; Run the fmadd insn through fp-adder first, then fp-multiplier. +;; +;; The latency for fmadd is 2 cycles if the result is used +;; by another fmadd instruction. +(define_insn_reservation "r10k_fmadd" 4 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "fmadd")) + "r10k_fpadd, r10k_fpmpy") + +(define_bypass 2 "r10k_fmadd" "r10k_fmadd") + + +;; Floating point Divisions & square roots. +(define_insn_reservation "r10k_fdiv_single" 12 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "SF"))) + "r10k_fpdiv * 14") + +(define_insn_reservation "r10k_fdiv_double" 19 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fdiv,frdiv") + (eq_attr "mode" "DF"))) + "r10k_fpdiv * 21") + +(define_insn_reservation "r10k_fsqrt_single" 18 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_fsqrt_double" 33 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "fsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + +(define_insn_reservation "r10k_frsqrt_single" 30 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "SF"))) + "r10k_fpsqrt * 20") + +(define_insn_reservation "r10k_frsqrt_double" 52 + (and (eq_attr "cpu" "r10000") + (and (eq_attr "type" "frsqrt") + (eq_attr "mode" "DF"))) + "r10k_fpsqrt * 35") + + +;; Handle unknown/multi insns here (this is a guess). +(define_insn_reservation "r10k_unknown" 1 + (and (eq_attr "cpu" "r10000") + (eq_attr "type" "unknown,multi")) + "r10k_alu1 + r10k_alu2") diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 20532ba3de2..597f1edd567 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -607,6 +607,10 @@ static const struct mips_cpu_info mips_cpu_info_table[] = { /* MIPS IV processors. */ { "r8000", PROCESSOR_R8000, 4, 0 }, + { "r10000", PROCESSOR_R10000, 4, 0 }, + { "r12000", PROCESSOR_R10000, 4, 0 }, + { "r14000", PROCESSOR_R10000, 4, 0 }, + { "r16000", PROCESSOR_R10000, 4, 0 }, { "vr5000", PROCESSOR_R5000, 4, 0 }, { "vr5400", PROCESSOR_R5400, 4, 0 }, { "vr5500", PROCESSOR_R5500, 4, PTF_AVOID_BRANCHLIKELY }, @@ -1015,6 +1019,19 @@ static const struct mips_rtx_cost_data mips_rtx_cost_data[PROCESSOR_MAX] = { 1, /* branch_cost */ 4 /* memory_latency */ }, + { /* R1x000 */ + COSTS_N_INSNS (2), /* fp_add */ + COSTS_N_INSNS (2), /* fp_mult_sf */ + COSTS_N_INSNS (2), /* fp_mult_df */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (19), /* fp_div_df */ + COSTS_N_INSNS (5), /* int_mult_si */ + COSTS_N_INSNS (9), /* int_mult_di */ + COSTS_N_INSNS (34), /* int_div_si */ + COSTS_N_INSNS (66), /* int_div_di */ + 1, /* branch_cost */ + 4 /* memory_latency */ + }, { /* SB1 */ /* These costs are the same as the SB-1A below. */ COSTS_N_INSNS (4), /* fp_add */ @@ -10369,7 +10386,10 @@ mips_issue_rate (void) but in reality only a maximum of 3 insns can be issued as floating-point loads and stores also require a slot in the AGEN pipe. */ - return 4; + case PROCESSOR_R10000: + /* All R10K Processors are quad-issue (being the first MIPS + processors to support this feature). */ + return 4; case PROCESSOR_20KC: case PROCESSOR_R4130: diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index dc390dfe745..b0f39dc9a57 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -67,6 +67,7 @@ enum processor_type { PROCESSOR_R7000, PROCESSOR_R8000, PROCESSOR_R9000, + PROCESSOR_R10000, PROCESSOR_SB1, PROCESSOR_SB1A, PROCESSOR_SR71000, diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index f3fdaeaf45b..6ae6c0bb78f 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -560,7 +560,7 @@ ;; Attribute describing the processor. This attribute must match exactly ;; with the processor_type enumeration in mips.h. (define_attr "cpu" - "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,octeon,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,xlr" + "r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,octeon,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,sb1,sb1a,sr71000,xlr" (const (symbol_ref "mips_tune"))) ;; The type of hardware hazard associated with this instruction. @@ -935,6 +935,7 @@ (include "6000.md") (include "7000.md") (include "9000.md") +(include "10000.md") (include "loongson2ef.md") (include "octeon.md") (include "sb1.md") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 62ae31fda46..49b5e5017b3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12231,6 +12231,7 @@ The processor names are: @samp{r2000}, @samp{r3000}, @samp{r3900}, @samp{r4000}, @samp{r4400}, @samp{r4600}, @samp{r4650}, @samp{r6000}, @samp{r8000}, @samp{rm7000}, @samp{rm9000}, +@samp{r10000}, @samp{r12000}, @samp{r14000}, @samp{r16000}, @samp{sb1}, @samp{sr71000}, @samp{vr4100}, @samp{vr4111}, @samp{vr4120}, @samp{vr4130}, @samp{vr4300}, -- 2.30.2