ADDRESS_SYMBOLIC
};
+/* Classifies an unconditional branch of interest for the P6600. */
+
+enum mips_ucbranch_type
+{
+ /* May not even be a branch. */
+ UC_UNDEFINED,
+ UC_BALC,
+ UC_OTHER
+};
+
/* Macros to create an enumeration identifier for a function prototype. */
#define MIPS_FTYPE_NAME1(A, B) MIPS_##A##_FTYPE_##B
#define MIPS_FTYPE_NAME2(A, B, C) MIPS_##A##_FTYPE_##B##_##C
COSTS_N_INSNS (36), /* int_div_di */
2, /* branch_cost */
4 /* memory_latency */
+ },
+ { /* P6600 */
+ COSTS_N_INSNS (4), /* fp_add */
+ COSTS_N_INSNS (5), /* fp_mult_sf */
+ COSTS_N_INSNS (5), /* fp_mult_df */
+ COSTS_N_INSNS (17), /* fp_div_sf */
+ COSTS_N_INSNS (17), /* fp_div_df */
+ COSTS_N_INSNS (5), /* int_mult_si */
+ COSTS_N_INSNS (5), /* int_mult_di */
+ COSTS_N_INSNS (8), /* int_div_si */
+ COSTS_N_INSNS (8), /* int_div_di */
+ 2, /* branch_cost */
+ 4 /* memory_latency */
}
};
\f
case PROCESSOR_LOONGSON_2F:
case PROCESSOR_LOONGSON_3A:
case PROCESSOR_P5600:
+ case PROCESSOR_P6600:
return 4;
case PROCESSOR_XLP:
if (TUNE_OCTEON)
return 2;
- if (TUNE_P5600 || TUNE_I6400)
+ if (TUNE_P5600 || TUNE_P6600 || TUNE_I6400)
return 4;
return 0;
return false;
}
+/* Subroutine of mips_avoid_hazard. We classify unconditional branches
+ of interest for the P6600 for performance reasons. We're interested
+ in differentiating BALC from JIC, JIALC and BC. */
+
+static enum mips_ucbranch_type
+mips_classify_branch_p6600 (rtx_insn *insn)
+{
+ /* We ignore sequences here as they represent a filled delay slot. */
+ if (!insn
+ || !USEFUL_INSN_P (insn)
+ || GET_CODE (PATTERN (insn)) == SEQUENCE)
+ return UC_UNDEFINED;
+
+ if (get_attr_jal (insn) == JAL_INDIRECT /* JIC and JIALC. */
+ || get_attr_type (insn) == TYPE_JUMP) /* BC. */
+ return UC_OTHER;
+
+ if (CALL_P (insn) && get_attr_jal (insn) == JAL_DIRECT)
+ return UC_BALC;
+
+ return UC_UNDEFINED;
+}
+
/* Subroutine of mips_reorg_process_insns. If there is a hazard between
INSN and a previous instruction, avoid it by inserting nops after
instruction AFTER.
&& GET_CODE (pattern) != ASM_INPUT
&& asm_noperands (pattern) < 0)
nops = 1;
+ /* The P6600's branch predictor can handle static sequences of back-to-back
+ branches in the following cases:
+
+ (1) BALC followed by any conditional compact branch
+ (2) BALC followed by BALC
+
+ Any other combinations of compact branches will incur performance
+ penalty. Inserting a no-op only costs space as the dispatch unit will
+ disregard the nop. */
+ else if (TUNE_P6600 && TARGET_CB_MAYBE && !optimize_size
+ && ((mips_classify_branch_p6600 (after) == UC_BALC
+ && mips_classify_branch_p6600 (insn) == UC_OTHER)
+ || (mips_classify_branch_p6600 (insn) == UC_BALC
+ && mips_classify_branch_p6600 (after) == UC_OTHER)))
+ nops = 1;
else
nops = 0;
/* Insert the nops between this instruction and the previous one.
Each new nop takes us further from the last hilo hazard. */
*hilo_delay += nops;
+
+ /* Move to the next real instruction if we are inserting a NOP and this
+ instruction is a call with debug information. The reason being that
+ we can't separate the call from the debug info. */
+ rtx_insn *real_after = after;
+ if (real_after && nops && CALL_P (real_after))
+ while (real_after
+ && (NOTE_P (NEXT_INSN (real_after))
+ || BARRIER_P (NEXT_INSN (real_after))))
+ real_after = NEXT_INSN (real_after);
+
while (nops-- > 0)
- emit_insn_after (gen_hazard_nop (), after);
+ emit_insn_after (gen_hazard_nop (), real_after);
/* Set up the state for the next instruction. */
*hilo_delay += ninsns;
switch (get_attr_hazard (insn))
{
case HAZARD_NONE:
+ /* For the P6600, flag some unconditional branches as having a
+ pseudo-forbidden slot. This will cause additional nop insertion
+ or SEQUENCE breaking as required. This is for performance
+ reasons not correctness. */
+ if (TUNE_P6600
+ && !optimize_size
+ && TARGET_CB_MAYBE
+ && mips_classify_branch_p6600 (insn) == UC_OTHER)
+ *fs_delay = true;
break;
case HAZARD_FORBIDDEN_SLOT:
and the next useful instruction is a SEQUENCE of a jump
and a non-nop instruction in the delay slot, remove the
sequence and replace it with the delay slot instruction
- then the jump to clear the forbidden slot hazard. */
-
- if (fs_delay)
+ then the jump to clear the forbidden slot hazard.
+
+ For the P6600, this optimisation solves the performance
+ penalty associated with BALC followed by a delay slot
+ branch. We do not set fs_delay as we do not want
+ the full logic of a forbidden slot; the penalty exists
+ only against branches not the full class of forbidden
+ slot instructions. */
+
+ if (fs_delay || (TUNE_P6600
+ && TARGET_CB_MAYBE
+ && mips_classify_branch_p6600 (insn)
+ == UC_BALC))
{
/* Search onwards from the current position looking for
a SEQUENCE. We are looking for pipeline hazards here
--- /dev/null
+;; DFA-based pipeline description for P6600.
+;;
+;; Copyright (C) 2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "p6600_agen_alq_pipe, p6600_mdu_pipe, p6600_fpu_pipe")
+
+;; The address generation queue (AGQ) has AL2, CTISTD and LDSTA pipes
+(define_cpu_unit "p6600_agq, p6600_al2, p6600_ctistd, p6600_lsu"
+ "p6600_agen_alq_pipe")
+
+(define_cpu_unit "p6600_gpmul, p6600_gpdiv" "p6600_mdu_pipe")
+
+;; The arithmetic-logic-unit queue (ALQ) has ALU pipe
+(define_cpu_unit "p6600_alq, p6600_alu" "p6600_agen_alq_pipe")
+
+;; The floating-point-unit queue (FPQ) has short and long pipes
+(define_cpu_unit "p6600_fpu_short, p6600_fpu_long" "p6600_fpu_pipe")
+
+;; Short FPU pipeline.
+(define_cpu_unit "p6600_fpu_intadd, p6600_fpu_cmp, p6600_fpu_float,
+ p6600_fpu_logic_a, p6600_fpu_logic_b, p6600_fpu_div,
+ p6600_fpu_store" "p6600_fpu_pipe")
+
+;; Long FPU pipeline.
+(define_cpu_unit "p6600_fpu_logic, p6600_fpu_float_a, p6600_fpu_float_b,
+ p6600_fpu_float_c, p6600_fpu_float_d" "p6600_fpu_pipe")
+(define_cpu_unit "p6600_fpu_mult, p6600_fpu_fdiv, p6600_fpu_apu" "p6600_fpu_pipe")
+
+(define_reservation "p6600_agq_al2" "p6600_agq, p6600_al2")
+(define_reservation "p6600_agq_ctistd" "p6600_agq, p6600_ctistd")
+(define_reservation "p6600_agq_lsu" "p6600_agq, p6600_lsu")
+(define_reservation "p6600_alq_alu" "p6600_alq, p6600_alu")
+
+;;
+;; FPU-MSA pipe
+;;
+
+;; Arithmetic
+;; add, hadd, sub, hsub, average, min, max, compare
+(define_insn_reservation "p6600_msa_short_int_add" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_int_arith"))
+ "p6600_fpu_short, p6600_fpu_intadd")
+
+;; Bitwise Instructions
+;; and, or, xor, bit-clear, leading-bits-count, shift, shuffle
+(define_insn_reservation "p6600_msa_short_logic" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_shift,simd_bit,simd_splat,simd_fill,simd_shf,
+ simd_permute,simd_logic"))
+ "p6600_fpu_short, p6600_fpu_logic_a")
+
+;; move.v
+(define_insn_reservation "p6600_msa_short_logic_move_v" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_move"))
+ "p6600_fpu_short, p6600_fpu_logic_a")
+
+;; Float compare
+(define_insn_reservation "p6600_msa_short_cmp" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fcmp"))
+ "p6600_fpu_short, p6600_fpu_cmp")
+
+;; Float exp2, min, max
+(define_insn_reservation "p6600_msa_short_float2" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fexp2,simd_fminmax"))
+ "p6600_fpu_short, p6600_fpu_float")
+
+;; Vector sat
+(define_insn_reservation "p6600_msa_short_logic3" 3
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_sat,simd_pcnt"))
+ "p6600_fpu_short, p6600_fpu_logic_a, p6600_fpu_logic_b")
+
+;; Vector copy, bz, bnz
+(define_insn_reservation "p6600_msa_short_store4" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_copy,simd_branch,simd_cmsa"))
+ "p6600_fpu_short, p6600_fpu_store")
+
+;; Vector load
+(define_insn_reservation "p6600_msa_load" 8
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_load"))
+ "p6600_agq_lsu")
+
+;; Vector store
+(define_insn_reservation "p6600_msa_short_store" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_store"))
+ "p6600_agq_lsu")
+
+;; binsl, binsr, insert, vshf, sld
+(define_insn_reservation "p6600_msa_long_logic" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_bitins,simd_bitmov,simd_insert,simd_sld"))
+ "p6600_fpu_long, p6600_fpu_logic")
+
+;; Float fclass, flog2
+(define_insn_reservation "p6600_msa_long_float2" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fclass,simd_flog2"))
+ "p6600_fpu_long, p6600_fpu_float_a")
+
+;; fadd, fsub
+(define_insn_reservation "p6600_msa_long_float4" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fadd,simd_fcvt"))
+ "p6600_fpu_long, p6600_fpu_float_a, p6600_fpu_float_b")
+
+;; fmul
+(define_insn_reservation "p6600_msa_long_float5" 5
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fmul"))
+ "p6600_fpu_long, p6600_fpu_float_a, p6600_fpu_float_b, p6600_fpu_float_c")
+
+;; fmadd, fmsub
+(define_insn_reservation "p6600_msa_long_float8" 8
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fmadd"))
+ "p6600_fpu_long, p6600_fpu_float_a,
+ p6600_fpu_float_b, p6600_fpu_float_c, p6600_fpu_float_d")
+
+;; Vector mul, dotp, madd, msub
+(define_insn_reservation "p6600_msa_long_mult" 5
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_mul"))
+ "p6600_fpu_long, p6600_fpu_mult")
+
+;; fdiv, fmod (semi-pipelined)
+(define_insn_reservation "p6600_msa_long_fdiv" 10
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_fdiv"))
+ "p6600_fpu_long, nothing, nothing, p6600_fpu_fdiv*8")
+
+;; div, mod (non-pipelined)
+(define_insn_reservation "p6600_msa_long_div" 10
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "simd_div"))
+ "p6600_fpu_long, p6600_fpu_div*9, p6600_fpu_div + p6600_fpu_logic_a")
+
+;;
+;; FPU pipe
+;;
+
+;; fadd, fsub
+(define_insn_reservation "p6600_fpu_fadd" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fadd"))
+ "p6600_fpu_long, p6600_fpu_apu")
+
+;; fabs, fneg, fcmp
+(define_insn_reservation "p6600_fpu_fabs" 2
+ (and (eq_attr "cpu" "p6600")
+ (ior (eq_attr "type" "fabs,fneg,fcmp,fmove")
+ (and (eq_attr "type" "condmove")
+ (eq_attr "mode" "SF,DF"))))
+ "p6600_fpu_short, p6600_fpu_apu")
+
+;; fload
+(define_insn_reservation "p6600_fpu_fload" 8
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fpload,fpidxload"))
+ "p6600_agq_lsu")
+
+;; fstore
+(define_insn_reservation "p6600_fpu_fstore" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fpstore,fpidxstore"))
+ "p6600_agq_lsu")
+
+;; fmadd
+(define_insn_reservation "p6600_fpu_fmadd" 8
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fmadd"))
+ "p6600_fpu_long, p6600_fpu_apu")
+
+;; fmul
+(define_insn_reservation "p6600_fpu_fmul" 5
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fmul"))
+ "p6600_fpu_long, p6600_fpu_apu")
+
+;; fdiv, fsqrt
+(define_insn_reservation "p6600_fpu_div" 17
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt"))
+ "p6600_fpu_long, p6600_fpu_apu*17")
+
+;; fcvt
+(define_insn_reservation "p6600_fpu_fcvt" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "fcvt"))
+ "p6600_fpu_long, p6600_fpu_apu")
+
+;; mtc
+(define_insn_reservation "p6600_fpu_fmtc" 7
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "mtc"))
+ "p6600_agq_lsu")
+
+;; mfc
+(define_insn_reservation "p6600_fpu_fmfc" 7
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "mfc"))
+ "p6600_agq_lsu")
+
+;;
+;; Integer pipe
+;;
+
+;; and
+(define_insn_reservation "p6600_int_and" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "move_type" "logical"))
+ "p6600_alq_alu")
+
+;; lui
+(define_insn_reservation "p6600_int_lui" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "move_type" "const"))
+ "p6600_alq_alu")
+
+;; Load lb, lbu, lh, lhu, lq, lw, lw_i2f, lwxs
+(define_insn_reservation "p6600_int_load" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "load"))
+ "p6600_agq_lsu")
+
+;; store
+(define_insn_reservation "p6600_int_store" 3
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "store"))
+ "p6600_agq_lsu")
+
+;; andi, sll, srl, seb, seh
+(define_insn_reservation "p6600_int_arith_1" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "move_type" "andi,sll0,signext"))
+ "p6600_alq_alu | p6600_agq_al2")
+
+;; addi, addiu, ori, xori, add, addu
+(define_insn_reservation "p6600_int_arith_2" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "alu_type" "add,or,xor"))
+ "p6600_alq_alu | p6600_agq_al2")
+
+;; nor, sub
+(define_insn_reservation "p6600_int_arith_3" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "alu_type" "and,not,nor,sub"))
+ "p6600_alq_alu")
+
+;; srl, sra, rotr, slt, sllv, srlv
+(define_insn_reservation "p6600_int_arith_4" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "shift,slt,move"))
+ "p6600_alq_alu | p6600_agq_al2")
+
+;; nop
+(define_insn_reservation "p6600_int_nop" 0
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "nop"))
+ "p6600_alq_alu | p6600_agq_al2")
+
+;; clo, clz
+(define_insn_reservation "p6600_int_countbits" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "clz"))
+ "p6600_agq_al2")
+
+;; Conditional moves
+(define_insn_reservation "p6600_int_condmove" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "condmove"))
+ "p6600_agq_al2")
+
+;; mfhi/lo
+(define_insn_reservation "p6600_dsp_mfhilo" 5
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "mfhi,mflo"))
+ "p6600_agq_lsu")
+
+;; mthi/lo
+(define_insn_reservation "p6600_dsp_mthilo" 5
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "mthi,mtlo"))
+ "p6600_agq_lsu")
+
+;; mul, mulu, muh, muhu
+(define_insn_reservation "p6600_dsp_mult" 4
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "imul3,imul,imul3nc"))
+ "p6600_gpmul")
+
+;; branch and jump
+(define_insn_reservation "p6600_int_branch" 1
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "branch,jump"))
+ "p6600_agq_ctistd")
+
+;; prefetch
+(define_insn_reservation "p6600_int_prefetch" 0
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "prefetch,prefetchx"))
+ "p6600_agq_lsu")
+
+;; divide
+(define_insn_reservation "p6600_int_div" 8
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "idiv,idiv3"))
+ "p6600_gpdiv*5")
+
+;; arith
+(define_insn_reservation "p6600_int_arith_5" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "type" "arith"))
+ "p6600_agq_al2")
+
+;; call
+(define_insn_reservation "p6600_int_call" 2
+ (and (eq_attr "cpu" "p6600")
+ (eq_attr "jal" "indirect,direct"))
+ "p6600_agq_ctistd")