Original Creator: Adria Armejach.
Branch instructions needed to be annotated in x86 as direct/indirect and conditional/unconditional. These annotations where not present causing the branch predictor to misbehave, not using the BTB. In addition, logic to determine the real branch target at decode needed to be added as it was also missing.
Change-Id: I91e707452c1825b9bb4ae75c3f599da489ae5b9a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29154
Reviewed-by: Alexandru Duțu <alexandru.dutu@amd.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
# Make the default data size of calls 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_call
+ .control_direct
limm t1, imm
rdip t7
# Make the default data size of calls 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_call
+ .control_indirect
rdip t1
# Check target of call
# Make the default data size of calls 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_call
+ .control_indirect
rdip t7
ld t1, seg, sib, disp
# Make the default data size of calls 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_call
+ .control_indirect
rdip t7
ld t1, seg, riprel, disp
{
# Make the defualt data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the defualt data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
def macroop JRCX_I
{
+ .control_direct
+
rdip t1
add t0, t0, rcx, flags=(EZF,), dataSize=asz
wripi t1, imm, flags=(CEZF,)
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_direct
rdip t1
limm t2, imm
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_indirect
wripi reg, 0
};
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_indirect
ld t1, seg, sib, disp
wripi t1, 0
{
# Make the default data size of jumps 64 bits in 64 bit mode
.adjust_env oszIn64Override
+ .control_indirect
rdip t7
ld t1, seg, riprel, disp
def macroop JMP_FAR_M
{
+ .control_indirect
+
limm t1, 0, dataSize=8
limm t2, 0, dataSize=8
lea t1, seg, sib, disp, dataSize=asz
def macroop JMP_FAR_P
{
+ .control_indirect
+
limm t1, 0, dataSize=8
limm t2, 0, dataSize=8
rdip t7, dataSize=asz
def macroop JMP_FAR_I
{
+ .control_indirect
+
# Put the whole far pointer into a register.
limm t2, imm, dataSize=8
# Figure out the width of the offset.
def macroop JMP_FAR_REAL_M
{
+ .control_indirect
+
lea t1, seg, sib, disp, dataSize=asz
ld t2, seg, [1, t0, t1], dsz
ld t1, seg, [1, t0, t1]
def macroop JMP_FAR_REAL_P
{
+ .control_indirect
panic "Real mode far jump executed in 64 bit mode!"
};
def macroop JMP_FAR_REAL_I
{
+ .control_indirect
+
# Put the whole far pointer into a register.
limm t2, imm, dataSize=8
# Figure out the width of the offset.
microcode = '''
def macroop LOOP_I {
+ .control_direct
+
# Make the default data size of pops 64 bits in 64 bit mode
.adjust_env oszIn64Override
rdip t1
};
def macroop LOOPNE_I {
+ .control_direct
+
# Make the default data size of pops 64 bits in 64 bit mode
.adjust_env oszIn64Override
rdip t1
};
def macroop LOOPE_I {
+ .control_direct
+
# Make the default data size of pops 64 bits in 64 bit mode
.adjust_env oszIn64Override
rdip t1
# Make the default data size of rets 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_return
+ .control_indirect
ld t1, ss, [1, t0, rsp]
# Check address of return
# Make the default data size of rets 64 bits in 64 bit mode
.adjust_env oszIn64Override
.function_return
+ .control_indirect
limm t2, imm
ld t1, ss, [1, t0, rsp]
def macroop RET_FAR {
.adjust_env oszIn64Override
.function_return
+ .control_indirect
# Get the return RIP
ld t1, ss, [1, t0, rsp]
self.function_call = True
def function_return(self):
self.function_return = True
+ def control_direct(self):
+ self.control_direct = True
+ def control_indirect(self):
+ self.control_indirect = True
def __init__(self, name):
super(X86Macroop, self).__init__(name)
"serialize_before" : self.serializeBefore,
"serialize_after" : self.serializeAfter,
"function_call" : self.function_call,
- "function_return" : self.function_return
+ "function_return" : self.function_return,
+ "control_direct" : self.control_direct,
+ "control_indirect" : self.control_indirect
}
self.declared = False
self.adjust_env = ""
self.serialize_after = False
self.function_call = False
self.function_return = False
+ self.control_direct = False
+ self.control_indirect = False
def getAllocator(self, env):
return "new X86Macroop::%s(machInst, %s)" % \
if self.function_return:
flags.append("IsReturn")
flags.append("IsUncondControl")
+ if self.control_direct:
+ flags.append("IsDirectControl")
+ if self.control_indirect:
+ flags.append("IsIndirectControl")
else:
flags.append("IsDelayedCommit")
uint8_t _dataSize, uint16_t _ext);
Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+ override;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
};
}};
uint8_t _dataSize, uint16_t _ext);
Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+ override;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
};
}};
%(constructor)s;
%(cond_control_flag_init)s;
}
+
+ X86ISA::PCState
+ %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+ {
+ X86ISA::PCState pcs = branchPC;
+ DPRINTF(X86, "branchTarget PC info: %s, Immediate: %lx\n",
+ pcs, (int64_t) this->machInst.immediate);
+ pcs.npc(pcs.npc() + (int64_t) this->machInst.immediate);
+ pcs.uEnd();
+ return pcs;
+ }
}};
def template MicroRegOpImmConstructor {{
%(constructor)s;
%(cond_control_flag_init)s;
}
+
+ X86ISA::PCState
+ %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+ {
+ X86ISA::PCState pcs = branchPC;
+ DPRINTF(X86, "branchTarget PC info: %s, Immediate (imm8): %lx\n",
+ pcs, (int8_t)imm8);
+ pcs.npc(pcs.npc() + (int8_t)imm8);
+ pcs.uEnd();
+ return pcs;
+ }
}};
output header {{
# a version without it and fix up this version to use it.
if flag_code != "" or cond_check != "true":
self.buildCppClasses(name, Name, suffix,
- code, big_code, "", "true", else_code, "", op_class)
+ code, big_code, "", "true", else_code,
+ "flags[IsUncondControl] = flags[IsControl];", op_class)
suffix = "Flags" + suffix
# If psrc1 or psrc2 is used, we need to actually insert code to
uint64_t setFlags, uint16_t _target, uint8_t _cc);
Fault execute(ExecContext *, Trace::InstRecord *) const;
+
+ X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
+ override;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
};
}};
%(constructor)s;
%(cond_control_flag_init)s;
}
+
+ X86ISA::PCState
+ %(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
+ {
+ X86ISA::PCState pcs = branchPC;
+ DPRINTF(X86, "Br branchTarget PC info: %s, Target: %d\n",
+ pcs, (int16_t)target);
+ pcs.nupc(target);
+ pcs.uAdvance();
+ return pcs;
+ }
}};
output decoder {{
"else_code": "nuIP = nuIP;",
"cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
ecfBit | ezfBit, cc)",
- "cond_control_flag_init": "flags[IsCondControl] = true"})
+ "cond_control_flag_init": "flags[IsCondControl] = true; \
+ flags[IsDirectControl] = true;"})
exec_output += SeqOpExecute.subst(iop)
header_output += SeqOpDeclare.subst(iop)
decoder_output += SeqOpConstructor.subst(iop)
{"code": "", "else_code": "",
"cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
ecfBit | ezfBit, cc)",
- "cond_control_flag_init": ""})
+ "cond_control_flag_init": "flags[IsUncondControl] = true;\
+ flags[IsDirectControl] = true;"})
exec_output += SeqOpExecute.subst(iop)
header_output += SeqOpDeclare.subst(iop)
decoder_output += SeqOpConstructor.subst(iop)
DPRINTF(Decode,
"[tid:%i] [sn:%llu] "
- "Updating predictions: PredPC: %s\n",
- tid, inst->seqNum, target);
+ "Updating predictions: Wrong predicted target: %s \
+ PredPC: %s\n",
+ tid, inst->seqNum, inst->readPredTarg(), target);
//The micro pc after an instruction level branch should be 0
inst->setPredTarg(target);
break;