from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
- send_evictions = (options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
prefetcher = prefetcher,
ruby_system = ruby_system,
clk_domain=system.cpu[i].clk_domain,
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the cache latency is only used by the sequencer on fast path hits
#
l1_cntrl = L1Cache_Controller(version = i,
cacheMemory = cache,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
not options.disable_dyn_timeouts,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
from m5.objects import *
from m5.defines import buildEnv
from Ruby import create_topology
+from Ruby import send_evicts
#
# Note: the L1 Cache latency is only used by the sequencer on fast path hits
L2cache = l2_cache,
no_mig_atomic = not \
options.allow_atomic_migration,
- send_evictions = (
- options.cpu_type == "detailed"),
+ send_evictions = send_evicts(options),
transitions_per_cycle = options.ports,
clk_domain=system.cpu[i].clk_domain,
ruby_system = ruby_system)
ruby.num_of_sequencers = len(cpu_sequencers)
ruby.random_seed = options.random_seed
+def send_evicts(options):
+ # currently, 2 scenarios warrant forwarding evictions to the CPU:
+ # 1. The O3 model must keep the LSQ coherent with the caches
+ # 2. The x86 mwait instruction is built on top of coherence invalidations
+ if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
+ return True
+ return False
+
# Create a backing copy of physical memory in case required
if options.access_backing_store:
ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
}
0x1: decode MODRM_MOD {
0x3: decode MODRM_RM {
- 0x0: monitor();
- 0x1: mwait();
+ 0x0: MonitorInst::monitor({{
+ xc->armMonitor(Rax);
+ }});
+ 0x1: MwaitInst::mwait({{
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemAtomic(xc, traceData,
+ xc->getAddrMonitor()->vAddr,
+ m, s, f);
+ xc->mwaitAtomic(xc->tcBase());
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ hltObj.execute(xc, traceData);
+ }});
default: Inst::UD2();
}
default: sidt_Ms();
//Include a format to generate a CPUID instruction.
##include "cpuid.isa"
+//Include a format to generate a monitor/mwait instructions.
+##include "monitor_mwait.isa"
+
//Include the "unknown" format
##include "unknown.isa"
--- /dev/null
+// Copyright (c) AMD
+// All rights reserved.
+//
+// Authors: Marc Orr
+
+// Monitor Instruction
+
+output header {{
+ class MonitorInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MonitorInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ { }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MonitorInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MonitorInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags)
+ header_output = BasicDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+}};
+
+
+// Mwait instruction
+
+// Declarations for execute() methods.
+def template MwaitExecDeclare {{
+ Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+ Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
+ Trace::InstRecord *) const;
+}};
+
+def template MwaitDeclare {{
+ class %(class_name)s : public %(base_class)s
+ {
+ public:
+ // Constructor.
+ %(class_name)s(ExtMachInst machInst);
+ %(MwaitExecDeclare)s
+ };
+}};
+
+def template MwaitInitiateAcc {{
+ Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
+ Trace::InstRecord * traceData) const
+ {
+ uint64_t m = 0; //mem
+ unsigned s = 0x8; //size
+ unsigned f = 0; //flags
+ readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f);
+ return NoFault;
+ }
+}};
+
+def template MwaitCompleteAcc {{
+ Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc,
+ Trace::InstRecord *traceData) const
+ {
+ MicroHalt hltObj(machInst, mnemonic, 0x0);
+ if(xc->mwait(pkt)) {
+ hltObj.execute(xc, traceData);
+ }
+ return NoFault;
+ }
+}};
+
+output header {{
+ class MwaitInst : public X86ISA::X86StaticInst
+ {
+ public:
+ static const RegIndex foldOBit = 0;
+ /// Constructor
+ MwaitInst(const char *_mnemonic, ExtMachInst _machInst,
+ OpClass __opClass) :
+ X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+ {
+ flags[IsMemRef] = 1;
+ flags[IsLoad] = 1;
+ }
+
+ std::string generateDisassembly(Addr pc,
+ const SymbolTable *symtab) const;
+ };
+}};
+
+output decoder {{
+ std::string MwaitInst::generateDisassembly(Addr PC,
+ const SymbolTable *symtab) const
+ {
+ std::stringstream response;
+
+ printMnemonic(response, mnemonic);
+ ccprintf(response, " ");
+ printReg(response, _srcRegIdx[0], machInst.opSize);
+ return response.str();
+ }
+}};
+
+def format MwaitInst(code, *opt_flags) {{
+ iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags)
+ header_output = MwaitDeclare.subst(iop)
+ decoder_output = BasicConstructor.subst(iop)
+ decode_block = BasicDecode.subst(iop)
+ exec_output = BasicExecute.subst(iop)
+ exec_output += MwaitInitiateAcc.subst(iop)
+ exec_output += MwaitCompleteAcc.subst(iop)
+}};
+
DebugFlag('O3PipeView')
DebugFlag('PCEvent')
DebugFlag('Quiesce')
+DebugFlag('Mwait')
CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
#include "base/misc.hh"
#include "base/output.hh"
#include "base/trace.hh"
-#include "cpu/base.hh"
#include "cpu/checker/cpu.hh"
+#include "cpu/base.hh"
#include "cpu/cpuevent.hh"
#include "cpu/profile.hh"
#include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
#include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
#include "params/BaseCPU.hh"
#include "sim/full_system.hh"
#include "sim/process.hh"
_taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
_switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
interrupts(p->interrupts), profileEvent(NULL),
- numThreads(p->numThreads), system(p->system)
+ numThreads(p->numThreads), system(p->system),
+ addressMonitor()
{
// if Python did not provide a valid ID, do it here
if (_cpuId == -1 ) {
delete[] comInstEventQueue;
}
+void
+BaseCPU::armMonitor(Addr address)
+{
+ addressMonitor.armed = true;
+ addressMonitor.vAddr = address;
+ addressMonitor.pAddr = 0x0;
+ DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+ if(addressMonitor.gotWakeup == false) {
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+
+ assert(pkt->req->hasPaddr());
+ addressMonitor.pAddr = pkt->getAddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+ return true;
+ } else {
+ addressMonitor.gotWakeup = false;
+ return false;
+ }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+ Request req;
+ Addr addr = addressMonitor.vAddr;
+ int block_size = cacheLineSize();
+ uint64_t mask = ~((uint64_t)(block_size - 1));
+ int size = block_size;
+
+ //The address of the next line if it crosses a cache line boundary.
+ Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+ if (secondAddr > addr)
+ size = secondAddr - addr;
+
+ req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+ // translate to physical address
+ Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+ assert(fault == NoFault);
+
+ addressMonitor.pAddr = req.getPaddr() & mask;
+ addressMonitor.waiting = true;
+
+ DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+ addressMonitor.vAddr, addressMonitor.pAddr);
+}
+
void
BaseCPU::init()
{
comInstEventQueue[tid]->schedule(event, now + insts);
}
+AddressMonitor::AddressMonitor() {
+ armed = false;
+ waiting = false;
+ gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+ assert(pkt->req->hasPaddr());
+ if(armed && waiting) {
+ if(pAddr == pkt->getAddr()) {
+ DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+ pkt->getAddr());
+ waiting = false;
+ return true;
+ }
+ }
+ return false;
+}
+
void
BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
{
#include "sim/insttracer.hh"
#include "sim/probe/pmu.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+class BaseCPU;
struct BaseCPUParams;
class CheckerCPU;
class ThreadContext;
+struct AddressMonitor
+{
+ AddressMonitor();
+ bool doMonitor(PacketPtr pkt);
+
+ bool armed;
+ Addr vAddr;
+ Addr pAddr;
+ uint64_t val;
+ bool waiting; // 0=normal, 1=mwaiting
+ bool gotWakeup;
+};
+
class CPUProgressEvent : public Event
{
protected:
Stats::Scalar numCycles;
Stats::Scalar numWorkItemsStarted;
Stats::Scalar numWorkItemsCompleted;
+
+ private:
+ AddressMonitor addressMonitor;
+
+ public:
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
+ AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
+ void atomicNotify(Addr address);
};
#endif // THE_ISA == NULL_ISA
/** Sets the number of consecutive store conditional failures. */
void setStCondFailures(unsigned int sc_failures)
{ thread->storeCondFailures = sc_failures; }
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { cpu->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return cpu->mwaitAtomic(tc, cpu->dtb); }
+ AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
};
template<class Impl>
this->dtb->demapPage(vaddr, asn);
}
+ // monitor/mwait funtions
+ virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+
void demapInstPage(Addr vaddr, uint64_t asn)
{
this->itb->demapPage(vaddr, asn);
#include "arch/registers.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
+#include "cpu/base.hh"
#include "cpu/static_inst_fwd.hh"
#include "cpu/translation.hh"
* Invalidate a page in the DTLB <i>and</i> ITLB.
*/
virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
+ virtual void armMonitor(Addr address) = 0;
+ virtual bool mwait(PacketPtr pkt) = 0;
+ virtual void mwaitAtomic(ThreadContext *tc) = 0;
+ virtual AddressMonitor *getAddrMonitor() = 0;
/** @} */
outstring = s.str();
}
+
+void
+InOrderDynInst::armMonitor(Addr address) {
+ cpu->armMonitor(address);
+}
+
+bool
+InOrderDynInst::mwait(PacketPtr pkt) {
+ return cpu->mwait(pkt);
+}
+
+void
+InOrderDynInst::mwaitAtomic(ThreadContext *tc)
+{
+ return cpu->mwaitAtomic(tc, cpu->getDTBPtr());
+}
+
+AddressMonitor *
+InOrderDynInst::getAddrMonitor()
+{
+ return cpu->getCpuAddrMonitor();
+}
void demapPage(Addr vaddr, uint64_t asn) {
panic("demapPage unimplemented");
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address);
+ bool mwait(PacketPtr pkt);
+ void mwaitAtomic(ThreadContext *tc);
+ AddressMonitor *getAddrMonitor();
};
- TheISA::Misc_Reg_Base, val);
}
}
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+ AddressMonitor *getAddrMonitor()
+ { return getCpuPtr()->getCpuAddrMonitor(); }
};
}
void
FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
lsq->recvTimingSnoopReq(pkt);
}
/** Pointer to LSQ. */
LSQ<Impl> *lsq;
+ FullO3CPU<Impl> *cpu;
public:
/** Default constructor. */
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
- : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq)
+ : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
+ cpu(_cpu)
{ }
protected:
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+
// if snoop invalidates, release any associated locks
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
void
BaseSimpleCPU::wakeup()
{
+ getAddrMonitor()->gotWakeup = true;
+
if (thread->status() != ThreadContext::Suspended)
return;
private:
TheISA::PCState pred_pc;
+
+ public:
+ // monitor/mwait funtions
+ void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+ bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+ void mwaitAtomic(ThreadContext *tc)
+ { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+ AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
};
#endif // __CPU_SIMPLE_BASE_HH__
#include "sim/full_system.hh"
#include "sim/system.hh"
+#include "debug/Mwait.hh"
+
using namespace std;
using namespace TheISA;
void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
}
+void
+TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
+{
+ // X86 ISA: Snooping an invalidation for monitor/mwait
+ if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+ cpu->wakeup();
+ }
+}
bool
TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
* a wakeup event on a cpu that is monitoring an address
*/
virtual void recvTimingSnoopReq(PacketPtr pkt);
+ virtual void recvFunctionalSnoop(PacketPtr pkt);
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvRetry();
+ virtual bool isSnooping() const {
+ return true;
+ }
+
struct DTickEvent : public TickEvent
{
DTickEvent(TimingSimpleCPU *_cpu)