equivalent to seeing no -g option at all. */
static int mips_debug = 0;
-/* The maximum number of NOPs needed to satisfy a hardware hazard
- or processor errata. */
-#define MAX_NOPS 2
+/* The maximum number of NOPs needed to avoid the VR4130 mflo/mfhi errata. */
+#define MAX_VR4130_NOPS 4
+
+/* The maximum number of NOPs needed to fill delay slots. */
+#define MAX_DELAY_NOPS 2
+
+/* The maximum number of NOPs needed for any purpose. */
+#define MAX_NOPS 4
/* A list of previous instructions, with index 0 being the most recent.
We need to look back MAX_NOPS instructions when filling delay slots
/* True if -mfix-vr4120 is in force. */
static int mips_fix_vr4120;
+/* ...likewise -mfix-vr4130. */
+static int mips_fix_vr4130;
+
/* We don't relax branches by default, since this causes us to expand
`la .l2 - .l1' if there's a branch between .l1 and .l2, because we
fail to compute the offset before expanding the macro to the most
return 0;
}
+/* Return the number of nops that would be needed to work around the
+ VR4130 mflo/mfhi errata if instruction INSN immediately followed
+ the MAX_VR4130_NOPS instructions described by HISTORY. */
+
+static int
+nops_for_vr4130 (const struct mips_cl_insn *history,
+ const struct mips_cl_insn *insn)
+{
+ int i, j, reg;
+
+ /* Check if the instruction writes to HI or LO. MTHI and MTLO
+ are not affected by the errata. */
+ if (insn != 0
+ && ((insn->insn_mo->pinfo & (INSN_WRITE_HI | INSN_WRITE_LO)) == 0
+ || strcmp (insn->insn_mo->name, "mtlo") == 0
+ || strcmp (insn->insn_mo->name, "mthi") == 0))
+ return 0;
+
+ /* Search for the first MFLO or MFHI. */
+ for (i = 0; i < MAX_VR4130_NOPS; i++)
+ if (!history[i].noreorder_p && MF_HILO_INSN (history[i].insn_mo->pinfo))
+ {
+ /* Extract the destination register. */
+ if (mips_opts.mips16)
+ reg = mips16_to_32_reg_map[MIPS16_EXTRACT_OPERAND (RX, history[i])];
+ else
+ reg = EXTRACT_OPERAND (RD, history[i]);
+
+ /* No nops are needed if INSN reads that register. */
+ if (insn != NULL && insn_uses_reg (insn, reg, MIPS_GR_REG))
+ return 0;
+
+ /* ...or if any of the intervening instructions do. */
+ for (j = 0; j < i; j++)
+ if (insn_uses_reg (&history[j], reg, MIPS_GR_REG))
+ return 0;
+
+ return MAX_VR4130_NOPS - i;
+ }
+ return 0;
+}
+
/* Return the number of nops that would be needed if instruction INSN
immediately followed the MAX_NOPS instructions given by HISTORY,
where HISTORY[0] is the most recent instruction. If INSN is null,
int i, nops, tmp_nops;
nops = 0;
- for (i = 0; i < MAX_NOPS; i++)
+ for (i = 0; i < MAX_DELAY_NOPS; i++)
if (!history[i].noreorder_p)
{
tmp_nops = insns_between (history + i, insn) - i;
if (tmp_nops > nops)
nops = tmp_nops;
}
+
+ if (mips_fix_vr4130)
+ {
+ tmp_nops = nops_for_vr4130 (history, insn);
+ if (tmp_nops > nops)
+ nops = tmp_nops;
+ }
+
return nops;
}
#define OPTION_NO_FIX_VR4120 (OPTION_FIX_BASE + 3)
{"mfix-vr4120", no_argument, NULL, OPTION_FIX_VR4120},
{"mno-fix-vr4120", no_argument, NULL, OPTION_NO_FIX_VR4120},
+#define OPTION_FIX_VR4130 (OPTION_FIX_BASE + 4)
+#define OPTION_NO_FIX_VR4130 (OPTION_FIX_BASE + 5)
+ {"mfix-vr4130", no_argument, NULL, OPTION_FIX_VR4130},
+ {"mno-fix-vr4130", no_argument, NULL, OPTION_NO_FIX_VR4130},
/* Miscellaneous options. */
-#define OPTION_MISC_BASE (OPTION_FIX_BASE + 4)
+#define OPTION_MISC_BASE (OPTION_FIX_BASE + 6)
#define OPTION_TRAP (OPTION_MISC_BASE + 0)
{"trap", no_argument, NULL, OPTION_TRAP},
{"no-break", no_argument, NULL, OPTION_TRAP},
mips_fix_vr4120 = 0;
break;
+ case OPTION_FIX_VR4130:
+ mips_fix_vr4130 = 1;
+ break;
+
+ case OPTION_NO_FIX_VR4130:
+ mips_fix_vr4130 = 0;
+ break;
+
case OPTION_RELAX_BRANCH:
mips_relax_branch = 1;
break;
-no-mips16 do not generate mips16 instructions\n"));
fprintf (stream, _("\
-mfix-vr4120 work around certain VR4120 errata\n\
+-mfix-vr4130 work around VR4130 mflo/mfhi errata\n\
-mgp32 use 32-bit GPRs, regardless of the chosen ISA\n\
-mfp32 use 32-bit FPRs, regardless of the chosen ISA\n\
-mno-shared optimize output for executables\n\
--- /dev/null
+ .macro check2 insn
+ mflo $2
+ \insn $3,$3
+ .endm
+
+ .macro check3 insn
+ mfhi $2
+ \insn $0,$3,$3
+ .endm
+
+ .macro main func
+
+ .ent \func
+ .type \func,@function
+\func:
+
+ # PART A
+ #
+ # Check that mfhis and mflos in .set noreorder blocks are not
+ # considered.
+
+ .set noreorder
+ mfhi $2
+ .set reorder
+ mult $3,$3
+
+ .set noreorder
+ mflo $2
+ .set reorder
+ mult $3,$3
+
+ # PART B
+ #
+ # Check for simple instances.
+
+ mfhi $2
+ mult $3,$3 # 4 nops
+
+ mfhi $2
+ addiu $3,1
+ mult $4,$4 # 3 nops
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ mult $5,$5 # 2 nops
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ mult $6,$6 # 1 nop
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ addiu $6,1
+ mult $7,$7 # 0 nops
+
+ # PART C
+ #
+ # Check that no nops are inserted after the result has been read.
+
+ mfhi $2
+ addiu $2,1
+ addiu $3,1
+ addiu $4,1
+ mult $5,$5
+
+ mfhi $2
+ addiu $3,1
+ addiu $2,1
+ addiu $4,1
+ mult $5,$5
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ addiu $2,1
+ mult $5,$5
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ mult $2,$2
+
+ # PART D
+ #
+ # Check that we still insert the usual interlocking nops in cases
+ # where the VR4130 errata doesn't apply.
+
+ mfhi $2
+ mult $2,$2 # 2 nops
+
+ mfhi $2
+ addiu $2,1
+ mult $3,$3 # 1 nop
+
+ mfhi $2
+ addiu $3,1
+ mult $2,$2 # 1 nop
+
+ # PART E
+ #
+ # Check for branches whose targets might be affected.
+
+ mfhi $2
+ bnez $3,1f # 2 nops for normal mode, 3 for mips16
+
+ mfhi $2
+ addiu $3,1
+ bnez $3,1f # 1 nop for normal mode, 2 for mips16
+
+ mfhi $2
+ addiu $3,1
+ addiu $3,1
+ bnez $3,1f # 0 nops for normal mode, 1 for mips16
+
+ mfhi $2
+ addiu $3,1
+ addiu $3,1
+ addiu $3,1
+ bnez $3,1f # 0 nops
+
+ # PART F
+ #
+ # As above, but with no dependencies between the branch and
+ # the previous instruction. The final branch can use the
+ # preceding addiu as its delay slot.
+
+ mfhi $2
+ addiu $3,1
+ bnez $4,1f # 1 nop for normal mode, 2 for mips16
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ bnez $5,1f # 0 nops for normal mode, 1 for mips16
+
+ mfhi $2
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ bnez $6,1f # 0 nops, fill delay slot in normal mode
+1:
+
+ # PART G
+ #
+ # Like part B, but check that intervening .set noreorders don't
+ # affect the number of nops.
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ .set reorder
+ mult $4,$4 # 3 nops
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ .set reorder
+ addiu $4,1
+ mult $5,$5 # 2 nops
+
+ mfhi $2
+ addiu $3,1
+ .set noreorder
+ addiu $4,1
+ .set reorder
+ mult $5,$5 # 2 nops
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ addiu $4,1
+ .set reorder
+ mult $5,$5 # 2 nops
+
+ mfhi $2
+ addiu $3,1
+ .set noreorder
+ addiu $4,1
+ .set reorder
+ addiu $5,1
+ mult $6,$6 # 1 nop
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ .set reorder
+ mult $6,$6 # 1 nop
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ addiu $6,1
+ .set reorder
+ mult $7,$7 # 0 nops
+
+ # PART H
+ #
+ # Like part B, but the mult occurs in a .set noreorder block.
+
+ mfhi $2
+ .set noreorder
+ mult $3,$3 # 4 nops
+ .set reorder
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ mult $4,$4 # 3 nops
+ .set reorder
+
+ mfhi $2
+ addiu $3,1
+ .set noreorder
+ addiu $4,1
+ mult $5,$5 # 2 nops
+ .set reorder
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ mult $6,$6 # 1 nop
+ .set reorder
+
+ mfhi $2
+ .set noreorder
+ addiu $3,1
+ addiu $4,1
+ addiu $5,1
+ addiu $6,1
+ mult $7,$7 # 0 nops
+ .set reorder
+
+ # PART I
+ #
+ # Check every affected multiplication and division instruction.
+
+ check2 mult
+ check2 multu
+ check2 dmult
+ check2 dmultu
+
+ check3 div
+ check3 divu
+ check3 ddiv
+ check3 ddivu
+
+ .end \func
+ .endm
+
+ .set nomips16
+ main foo
+
+ # PART J
+ #
+ # Check every affected multiply-accumulate instruction.
+
+ check3 macc
+ check3 macchi
+ check3 macchis
+ check3 macchiu
+ check3 macchius
+ check3 maccs
+ check3 maccu
+ check3 maccus
+
+ check3 dmacc
+ check3 dmacchi
+ check3 dmacchis
+ check3 dmacchiu
+ check3 dmacchius
+ check3 dmaccs
+ check3 dmaccu
+ check3 dmaccus
+
+ # PART K
+ #
+ # Check that mtlo and mthi are exempt from the VR4130 errata,
+ # although the usual interlocking delay applies.
+
+ mflo $2
+ mtlo $3
+
+ mflo $2
+ mthi $3
+
+ mfhi $2
+ mtlo $3
+
+ mfhi $2
+ mthi $3
+
+ .set mips16
+ main bar