x86 isa: This patch attempts an implementation at mwait.
authorMarc Orr <morr@cs.wisc.edu>
Thu, 6 Nov 2014 11:42:22 +0000 (05:42 -0600)
committerMarc Orr <morr@cs.wisc.edu>
Thu, 6 Nov 2014 11:42:22 +0000 (05:42 -0600)
Mwait works as follows:
1. A cpu monitors an address of interest (monitor instruction)
2. A cpu calls mwait - this loads the cache line into that cpu's cache.
3. The cpu goes to sleep.
4. When another processor requests write permission for the line, it is
   evicted from the sleeping cpu's cache. This eviction is forwarded to the
   sleeping cpu, which then wakes up.

Committed by: Nilay Vaish <nilay@cs.wisc.edu>

26 files changed:
configs/ruby/MESI_Three_Level.py
configs/ruby/MESI_Two_Level.py
configs/ruby/MI_example.py
configs/ruby/MOESI_CMP_directory.py
configs/ruby/MOESI_CMP_token.py
configs/ruby/MOESI_hammer.py
configs/ruby/Ruby.py
src/arch/x86/isa/decoder/two_byte_opcodes.isa
src/arch/x86/isa/formats/formats.isa
src/arch/x86/isa/formats/monitor_mwait.isa [new file with mode: 0644]
src/cpu/SConscript
src/cpu/base.cc
src/cpu/base.hh
src/cpu/base_dyn_inst.hh
src/cpu/checker/cpu.hh
src/cpu/exec_context.hh
src/cpu/inorder/inorder_dyn_inst.cc
src/cpu/inorder/inorder_dyn_inst.hh
src/cpu/minor/exec_context.hh
src/cpu/o3/cpu.cc
src/cpu/o3/cpu.hh
src/cpu/simple/atomic.cc
src/cpu/simple/base.cc
src/cpu/simple/base.hh
src/cpu/simple/timing.cc
src/cpu/simple/timing.hh

index f9ded25f19a9f4904dca513b28f4596d06771cc0..f5a2ddfbe04962a4c8ae0f5f20247d3f9a5e703d 100644 (file)
@@ -34,6 +34,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
 
             l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
                           Icache = l0i_cache, Dcache = l0d_cache,
-                          send_evictions = (options.cpu_type == "detailed"),
+                          send_evictions = send_evicts(options),
                           clk_domain=system.cpu[i].clk_domain,
                           ruby_system = ruby_system)
 
index b7bdd144706d437b578170202dbd7c0415068625..d911d76ef3f9687569130f216e8c592f4f774ee4 100644 (file)
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                       L1Icache = l1i_cache,
                                       L1Dcache = l1d_cache,
                                       l2_select_num_bits = l2_bits,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                       prefetcher = prefetcher,
                                       ruby_system = ruby_system,
                                       clk_domain=system.cpu[i].clk_domain,
index 2dd064b55149669395c14522fedf21be25c1667c..708e111e66aad6579a710f8a2f5bfc81d294cbea 100644 (file)
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the cache latency is only used by the sequencer on fast path hits
@@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
         #
         l1_cntrl = L1Cache_Controller(version = i,
                                       cacheMemory = cache,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                       transitions_per_cycle = options.ports,
                                       clk_domain=system.cpu[i].clk_domain,
                                       ruby_system = ruby_system)
index 9c4bab4343b9899ea77e7ab58eceb4f5709aa757..14ba33698e026d78c43529d3732a8c045f83ec76 100644 (file)
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                       L1Icache = l1i_cache,
                                       L1Dcache = l1d_cache,
                                       l2_select_num_bits = l2_bits,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                       transitions_per_cycle = options.ports,
                                       clk_domain=system.cpu[i].clk_domain,
                                       ruby_system = ruby_system)
index 26cd625b59114f82114439a71bf1a114f2afa473..42759b0920765b302f6053f9d7d281b1226e5e31 100644 (file)
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                         not options.disable_dyn_timeouts,
                                       no_mig_atomic = not \
                                         options.allow_atomic_migration,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                       transitions_per_cycle = options.ports,
                                       clk_domain=system.cpu[i].clk_domain,
                                       ruby_system = ruby_system)
index 740c6783e4935cbaeae07347483ccb883b84d677..571a645a69901c49a419f529b3afff1afe39ac87 100644 (file)
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts
 
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                       L2cache = l2_cache,
                                       no_mig_atomic = not \
                                         options.allow_atomic_migration,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                       transitions_per_cycle = options.ports,
                                       clk_domain=system.cpu[i].clk_domain,
                                       ruby_system = ruby_system)
index b99e251d37916a6ce9a72e08065830a8f2c84a78..44d6bdfcc5c9b3e647440c254496b515cbda26b0 100644 (file)
@@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []):
     ruby.num_of_sequencers = len(cpu_sequencers)
     ruby.random_seed    = options.random_seed
 
+def send_evicts(options):
+    # currently, 2 scenarios warrant forwarding evictions to the CPU:
+    # 1. The O3 model must keep the LSQ coherent with the caches
+    # 2. The x86 mwait instruction is built on top of coherence invalidations
+    if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
+        return True
+    return False
+
     # Create a backing copy of physical memory in case required
     if options.access_backing_store:
         ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),
index eb395fce2cc0c226053c1e711760565be81e1c44..081bad97139880ca18ec89de07c2ed51f31741bf 100644 (file)
                     }
                     0x1: decode MODRM_MOD {
                         0x3: decode MODRM_RM {
-                            0x0: monitor();
-                            0x1: mwait();
+                            0x0: MonitorInst::monitor({{
+                               xc->armMonitor(Rax);
+                            }});
+                            0x1: MwaitInst::mwait({{
+                               uint64_t m = 0;          //mem
+                               unsigned s = 0x8;        //size
+                               unsigned f = 0;          //flags
+                               readMemAtomic(xc, traceData,
+                                             xc->getAddrMonitor()->vAddr,
+                                             m, s, f);
+                               xc->mwaitAtomic(xc->tcBase());
+                               MicroHalt hltObj(machInst, mnemonic, 0x0);
+                               hltObj.execute(xc, traceData);
+                            }});
                             default: Inst::UD2();
                         }
                         default: sidt_Ms();
index cc0eb9acf7a60ce9f9fe49dff6dfd6af67214cb4..b5ffd4d59b01f5dae39f35d6225476fd0a91a26b 100644 (file)
@@ -45,6 +45,9 @@
 //Include a format to generate a CPUID instruction.
 ##include "cpuid.isa"
 
+//Include a format to generate a monitor/mwait instructions.
+##include "monitor_mwait.isa"
+
 //Include the "unknown" format
 ##include "unknown.isa"
 
diff --git a/src/arch/x86/isa/formats/monitor_mwait.isa b/src/arch/x86/isa/formats/monitor_mwait.isa
new file mode 100644 (file)
index 0000000..493b7c5
--- /dev/null
@@ -0,0 +1,131 @@
+// Copyright (c) AMD
+// All rights reserved.
+//
+// Authors: Marc Orr
+
+// Monitor Instruction
+
+output header {{
+    class MonitorInst : public X86ISA::X86StaticInst
+    {
+      public:
+        static const RegIndex foldOBit = 0;
+        /// Constructor
+        MonitorInst(const char *_mnemonic, ExtMachInst _machInst,
+                OpClass __opClass) :
+            X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+        { }
+
+        std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+    };
+}};
+
+output decoder {{
+    std::string MonitorInst::generateDisassembly(Addr PC,
+            const SymbolTable *symtab) const
+    {
+        std::stringstream response;
+
+        printMnemonic(response, mnemonic);
+        ccprintf(response, " ");
+        printReg(response, _srcRegIdx[0], machInst.opSize);
+        return response.str();
+    }
+}};
+
+def format MonitorInst(code, *opt_flags) {{
+    iop = InstObjParams(name, Name, 'MonitorInst', code, opt_flags)
+    header_output = BasicDeclare.subst(iop)
+    decoder_output = BasicConstructor.subst(iop)
+    decode_block = BasicDecode.subst(iop)
+    exec_output = BasicExecute.subst(iop)
+}};
+
+
+// Mwait instruction
+
+// Declarations for execute() methods.
+def template MwaitExecDeclare {{
+    Fault execute(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+    Fault initiateAcc(%(CPU_exec_context)s *, Trace::InstRecord *) const;
+    Fault completeAcc(PacketPtr, %(CPU_exec_context)s *,
+                      Trace::InstRecord *) const;
+}};
+
+def template MwaitDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+        public:
+        // Constructor.
+        %(class_name)s(ExtMachInst machInst);
+        %(MwaitExecDeclare)s
+    };
+}};
+
+def template MwaitInitiateAcc {{
+    Fault %(class_name)s::initiateAcc(CPU_EXEC_CONTEXT * xc,
+            Trace::InstRecord * traceData) const
+    {
+        uint64_t m = 0;          //mem
+        unsigned s = 0x8;        //size
+        unsigned f = 0;          //flags
+        readMemTiming(xc, traceData, xc->getAddrMonitor()->vAddr, m, s, f);
+        return NoFault;
+    }
+}};
+
+def template MwaitCompleteAcc {{
+    Fault %(class_name)s::completeAcc(PacketPtr pkt, CPU_EXEC_CONTEXT *xc,
+            Trace::InstRecord *traceData) const
+    {
+        MicroHalt hltObj(machInst, mnemonic, 0x0);
+        if(xc->mwait(pkt)) {
+            hltObj.execute(xc, traceData);
+        }
+        return NoFault;
+    }
+}};
+
+output header {{
+    class MwaitInst : public X86ISA::X86StaticInst
+    {
+      public:
+        static const RegIndex foldOBit = 0;
+        /// Constructor
+        MwaitInst(const char *_mnemonic, ExtMachInst _machInst,
+                OpClass __opClass) :
+            X86ISA::X86StaticInst(_mnemonic, _machInst, __opClass)
+        {
+            flags[IsMemRef] = 1;
+            flags[IsLoad] = 1;
+        }
+
+        std::string generateDisassembly(Addr pc,
+                const SymbolTable *symtab) const;
+    };
+}};
+
+output decoder {{
+    std::string MwaitInst::generateDisassembly(Addr PC,
+            const SymbolTable *symtab) const
+    {
+        std::stringstream response;
+
+        printMnemonic(response, mnemonic);
+        ccprintf(response, " ");
+        printReg(response, _srcRegIdx[0], machInst.opSize);
+        return response.str();
+    }
+}};
+
+def format MwaitInst(code, *opt_flags) {{
+    iop = InstObjParams(name, Name, 'MwaitInst', code, opt_flags)
+    header_output = MwaitDeclare.subst(iop)
+    decoder_output = BasicConstructor.subst(iop)
+    decode_block = BasicDecode.subst(iop)
+    exec_output = BasicExecute.subst(iop)
+    exec_output += MwaitInitiateAcc.subst(iop)
+    exec_output += MwaitCompleteAcc.subst(iop)
+}};
+
index df29f6c73d0e7adac0feb397f2c6293f70a9650b..570f5e2f1b76f19a600124f877b3b1e1fa13348f 100644 (file)
@@ -102,6 +102,7 @@ DebugFlag('IntrControl')
 DebugFlag('O3PipeView')
 DebugFlag('PCEvent')
 DebugFlag('Quiesce')
+DebugFlag('Mwait')
 
 CompoundFlag('ExecAll', [ 'ExecEnable', 'ExecCPSeq', 'ExecEffAddr',
     'ExecFaulting', 'ExecFetchSeq', 'ExecOpClass', 'ExecRegDelta',
index ea4df2aa8e8120442fa7fe400d7663a421862f7d..2f4745ee3649c48c3daff76b400b99731457a3c2 100644 (file)
 #include "base/misc.hh"
 #include "base/output.hh"
 #include "base/trace.hh"
-#include "cpu/base.hh"
 #include "cpu/checker/cpu.hh"
+#include "cpu/base.hh"
 #include "cpu/cpuevent.hh"
 #include "cpu/profile.hh"
 #include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
 #include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
 #include "params/BaseCPU.hh"
 #include "sim/full_system.hh"
 #include "sim/process.hh"
@@ -123,7 +125,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
       _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
       _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
       interrupts(p->interrupts), profileEvent(NULL),
-      numThreads(p->numThreads), system(p->system)
+      numThreads(p->numThreads), system(p->system),
+      addressMonitor()
 {
     // if Python did not provide a valid ID, do it here
     if (_cpuId == -1 ) {
@@ -260,6 +263,63 @@ BaseCPU::~BaseCPU()
     delete[] comInstEventQueue;
 }
 
+void
+BaseCPU::armMonitor(Addr address)
+{
+    addressMonitor.armed = true;
+    addressMonitor.vAddr = address;
+    addressMonitor.pAddr = 0x0;
+    DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+    if(addressMonitor.gotWakeup == false) {
+        int block_size = cacheLineSize();
+        uint64_t mask = ~((uint64_t)(block_size - 1));
+
+        assert(pkt->req->hasPaddr());
+        addressMonitor.pAddr = pkt->getAddr() & mask;
+        addressMonitor.waiting = true;
+
+        DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+                addressMonitor.vAddr, addressMonitor.pAddr);
+        return true;
+    } else {
+        addressMonitor.gotWakeup = false;
+        return false;
+    }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+    Request req;
+    Addr addr = addressMonitor.vAddr;
+    int block_size = cacheLineSize();
+    uint64_t mask = ~((uint64_t)(block_size - 1));
+    int size = block_size;
+
+    //The address of the next line if it crosses a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+    if (secondAddr > addr)
+        size = secondAddr - addr;
+
+    req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+    // translate to physical address
+    Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+    assert(fault == NoFault);
+
+    addressMonitor.pAddr = req.getPaddr() & mask;
+    addressMonitor.waiting = true;
+
+    DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+            addressMonitor.vAddr, addressMonitor.pAddr);
+}
+
 void
 BaseCPU::init()
 {
@@ -618,6 +678,25 @@ BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
     comInstEventQueue[tid]->schedule(event, now + insts);
 }
 
+AddressMonitor::AddressMonitor() {
+    armed = false;
+    waiting = false;
+    gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+    assert(pkt->req->hasPaddr());
+    if(armed && waiting) {
+        if(pAddr == pkt->getAddr()) {
+            DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+                    pkt->getAddr());
+            waiting = false;
+            return true;
+        }
+    }
+    return false;
+}
+
 void
 BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
 {
index 75c8f72630d11cedc2e2272729cc7a5f8ca67434..3673a5f181ef190612af347f001a9cb0e4c2984f 100644 (file)
 #include "sim/insttracer.hh"
 #include "sim/probe/pmu.hh"
 #include "sim/system.hh"
+#include "debug/Mwait.hh"
 
+class BaseCPU;
 struct BaseCPUParams;
 class CheckerCPU;
 class ThreadContext;
 
+struct AddressMonitor
+{
+    AddressMonitor();
+    bool doMonitor(PacketPtr pkt);
+
+    bool armed;
+    Addr vAddr;
+    Addr pAddr;
+    uint64_t val;
+    bool waiting;   // 0=normal, 1=mwaiting
+    bool gotWakeup;
+};
+
 class CPUProgressEvent : public Event
 {
   protected:
@@ -536,6 +551,16 @@ class BaseCPU : public MemObject
     Stats::Scalar numCycles;
     Stats::Scalar numWorkItemsStarted;
     Stats::Scalar numWorkItemsCompleted;
+
+  private:
+    AddressMonitor addressMonitor;
+
+  public:
+    void armMonitor(Addr address);
+    bool mwait(PacketPtr pkt);
+    void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
+    AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
+    void atomicNotify(Addr address);
 };
 
 #endif // THE_ISA == NULL_ISA
index 289627c9a34387418fb8b9d20e1fcbd3cc5341b8..af4d238e278ba07c04ccb326ad896d77193cffd8 100644 (file)
@@ -853,6 +853,14 @@ class BaseDynInst : public ExecContext, public RefCounted
     /** Sets the number of consecutive store conditional failures. */
     void setStCondFailures(unsigned int sc_failures)
     { thread->storeCondFailures = sc_failures; }
+
+  public:
+    // monitor/mwait funtions
+    void armMonitor(Addr address) { cpu->armMonitor(address); }
+    bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+    void mwaitAtomic(ThreadContext *tc)
+    { return cpu->mwaitAtomic(tc, cpu->dtb); }
+    AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
 };
 
 template<class Impl>
index d684b142bbf49a59f291452f446386ef93e2aab7..49f44ff001d2a5839781b2fc191208d51aea33f9 100644 (file)
@@ -349,6 +349,13 @@ class CheckerCPU : public BaseCPU, public ExecContext
         this->dtb->demapPage(vaddr, asn);
     }
 
+    // monitor/mwait funtions
+    virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+    bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+    void mwaitAtomic(ThreadContext *tc)
+    { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+    AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+
     void demapInstPage(Addr vaddr, uint64_t asn)
     {
         this->itb->demapPage(vaddr, asn);
index c85a746ac3e15f60d260aa160b1adcff1e1b8ade..c65841db2152a0bc36ca1fccf3426d3bcb8379f5 100644 (file)
@@ -47,6 +47,7 @@
 #include "arch/registers.hh"
 #include "base/types.hh"
 #include "config/the_isa.hh"
+#include "cpu/base.hh"
 #include "cpu/static_inst_fwd.hh"
 #include "cpu/translation.hh"
 
@@ -243,6 +244,10 @@ class ExecContext {
      * Invalidate a page in the DTLB <i>and</i> ITLB.
      */
     virtual void demapPage(Addr vaddr, uint64_t asn) = 0;
+    virtual void armMonitor(Addr address) = 0;
+    virtual bool mwait(PacketPtr pkt) = 0;
+    virtual void mwaitAtomic(ThreadContext *tc) = 0;
+    virtual AddressMonitor *getAddrMonitor() = 0;
 
     /** @} */
 
index 18281e636cfa020cf146eb56a5636a5e26790f9d..c64cf9da4e61774796b80d7fb4a6b7cffc09995c 100644 (file)
@@ -602,3 +602,25 @@ InOrderDynInst::dump(std::string &outstring)
 
     outstring = s.str();
 }
+
+void
+InOrderDynInst::armMonitor(Addr address) {
+    cpu->armMonitor(address);
+}
+
+bool
+InOrderDynInst::mwait(PacketPtr pkt) {
+    return cpu->mwait(pkt);
+}
+
+void
+InOrderDynInst::mwaitAtomic(ThreadContext *tc)
+{
+    return cpu->mwaitAtomic(tc, cpu->getDTBPtr());
+}
+
+AddressMonitor *
+InOrderDynInst::getAddrMonitor()
+{
+    return cpu->getCpuAddrMonitor();
+}
index 369ebe2f47c84acd472a0a1585ec97e790c2d93e..ebb7bf91287b4cfadba8521463a8e499490c6a8e 100644 (file)
@@ -1077,6 +1077,13 @@ class InOrderDynInst : public ExecContext, public RefCounted
     void demapPage(Addr vaddr, uint64_t asn) {
         panic("demapPage unimplemented");
     }
+
+  public:
+    // monitor/mwait funtions
+    void armMonitor(Addr address);
+    bool mwait(PacketPtr pkt);
+    void mwaitAtomic(ThreadContext *tc);
+    AddressMonitor *getAddrMonitor();
 };
 
 
index f1143498e7bfdecff3d103e41e1fb2fb782a1217..41345d3bddf4dbe7b180bcbac1211a51cf7e249f 100644 (file)
@@ -340,6 +340,15 @@ class ExecContext : public ::ExecContext
                 - TheISA::Misc_Reg_Base, val);
         }
     }
+
+  public:
+    // monitor/mwait funtions
+    void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
+    bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+    void mwaitAtomic(ThreadContext *tc)
+    { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+    AddressMonitor *getAddrMonitor()
+    { return getCpuPtr()->getCpuAddrMonitor(); }
 };
 
 }
index fd51cd123942f06e8709936a6ccb198950d797be..55ef04ffc215975cdab9e32585812ab5cb277e78 100644 (file)
@@ -117,6 +117,10 @@ template <class Impl>
 void
 FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 {
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
+        cpu->wakeup();
+    }
     lsq->recvTimingSnoopReq(pkt);
 }
 
index 96cd071e489e664b5810d77277a0de34340f0021..09b7db867f75e3ac776b10e05ff725d1417a85d5 100644 (file)
@@ -162,11 +162,13 @@ class FullO3CPU : public BaseO3CPU
 
         /** Pointer to LSQ. */
         LSQ<Impl> *lsq;
+        FullO3CPU<Impl> *cpu;
 
       public:
         /** Default constructor. */
         DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
-            : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq)
+            : MasterPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq),
+              cpu(_cpu)
         { }
 
       protected:
index d6dbb92921a2bf2961f978b0dd7a196a510ebdf3..e98da3ea7da614a04cdb4358649a0c1b1bdeadc4 100644 (file)
@@ -272,6 +272,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
     DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
             pkt->cmdString());
 
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+        cpu->wakeup();
+    }
+
     // if snoop invalidates, release any associated locks
     if (pkt->isInvalidate()) {
         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
@@ -288,6 +294,12 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
     DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
             pkt->cmdString());
 
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
+    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+        cpu->wakeup();
+    }
+
     // if snoop invalidates, release any associated locks
     if (pkt->isInvalidate()) {
         DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
index 60ab5399967f0cdf0e71e48714cf1edf45b7ea52..636e08899ad3c0510fc3c84d4c2d7e6200767893 100644 (file)
@@ -347,6 +347,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
 void
 BaseSimpleCPU::wakeup()
 {
+    getAddrMonitor()->gotWakeup = true;
+
     if (thread->status() != ThreadContext::Suspended)
         return;
 
index 8f38a33c8810a635cda954a05bee111b66bb5595..523bc9776ee61f855e0370efa2cc5ac154499720 100644 (file)
@@ -462,6 +462,14 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
 
   private:
     TheISA::PCState pred_pc;
+
+  public:
+    // monitor/mwait funtions
+    void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
+    bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+    void mwaitAtomic(ThreadContext *tc)
+    { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
+    AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
 };
 
 #endif // __CPU_SIMPLE_BASE_HH__
index 84a2c09fd1e558cd671fd3ad8f5ffb24c5f3c6d9..5bfc9799da7f816ace89a9ab5f3b766f0bdfd9a5 100644 (file)
@@ -58,6 +58,8 @@
 #include "sim/full_system.hh"
 #include "sim/system.hh"
 
+#include "debug/Mwait.hh"
+
 using namespace std;
 using namespace TheISA;
 
@@ -818,9 +820,21 @@ TimingSimpleCPU::updateCycleCounts()
 void
 TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 {
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+        cpu->wakeup();
+    }
     TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
 }
 
+void
+TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
+{
+    // X86 ISA: Snooping an invalidation for monitor/mwait
+    if(cpu->getAddrMonitor()->doMonitor(pkt)) {
+        cpu->wakeup();
+    }
+}
 
 bool
 TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
index 84c8f7418c15c8e5358aece72fee46140dfaff37..52eb6b1ba950ca20742915ceba7fb1c75b33839b 100644 (file)
@@ -228,11 +228,16 @@ class TimingSimpleCPU : public BaseSimpleCPU
          * a wakeup event on a cpu that is monitoring an address
          */
         virtual void recvTimingSnoopReq(PacketPtr pkt);
+        virtual void recvFunctionalSnoop(PacketPtr pkt);
 
         virtual bool recvTimingResp(PacketPtr pkt);
 
         virtual void recvRetry();
 
+        virtual bool isSnooping() const {
+            return true;
+        }
+
         struct DTickEvent : public TickEvent
         {
             DTickEvent(TimingSimpleCPU *_cpu)