cpu: Add per-thread monitors
authorMitch Hayenga <mitch.hayenga@arm.com>
Wed, 30 Sep 2015 16:14:19 +0000 (11:14 -0500)
committerMitch Hayenga <mitch.hayenga@arm.com>
Wed, 30 Sep 2015 16:14:19 +0000 (11:14 -0500)
Adds per-thread address monitors to support FullSystem SMT.

14 files changed:
src/cpu/base.cc
src/cpu/base.hh
src/cpu/base_dyn_inst.hh
src/cpu/checker/cpu.hh
src/cpu/minor/exec_context.hh
src/cpu/minor/fetch1.cc
src/cpu/minor/lsq.cc
src/cpu/o3/cpu.cc
src/cpu/simple/atomic.cc
src/cpu/simple/atomic.hh
src/cpu/simple/base.cc
src/cpu/simple/exec_context.hh
src/cpu/simple/timing.cc
src/cpu/simple/timing.hh

index 77ac5f2bbd654afff8fb4166399e20a409e0fbde..3b0809d09d5df35b0281fe071f1a875e5b830c5a 100644 (file)
@@ -133,7 +133,7 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
       numThreads(p->numThreads), system(p->system),
       functionTraceStream(nullptr), currentFunctionStart(0),
       currentFunctionEnd(0), functionEntryTick(0),
-      addressMonitor()
+      addressMonitor(p->numThreads)
 {
     // if Python did not provide a valid ID, do it here
     if (_cpuId == -1 ) {
@@ -271,39 +271,48 @@ BaseCPU::~BaseCPU()
 }
 
 void
-BaseCPU::armMonitor(Addr address)
+BaseCPU::armMonitor(ThreadID tid, Addr address)
 {
-    addressMonitor.armed = true;
-    addressMonitor.vAddr = address;
-    addressMonitor.pAddr = 0x0;
-    DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
+    monitor.armed = true;
+    monitor.vAddr = address;
+    monitor.pAddr = 0x0;
+    DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
 }
 
 bool
-BaseCPU::mwait(PacketPtr pkt)
+BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
 {
-    if(addressMonitor.gotWakeup == false) {
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
+    if(monitor.gotWakeup == false) {
         int block_size = cacheLineSize();
         uint64_t mask = ~((uint64_t)(block_size - 1));
 
         assert(pkt->req->hasPaddr());
-        addressMonitor.pAddr = pkt->getAddr() & mask;
-        addressMonitor.waiting = true;
+        monitor.pAddr = pkt->getAddr() & mask;
+        monitor.waiting = true;
 
-        DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
-                addressMonitor.vAddr, addressMonitor.pAddr);
+        DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
+                "line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
         return true;
     } else {
-        addressMonitor.gotWakeup = false;
+        monitor.gotWakeup = false;
         return false;
     }
 }
 
 void
-BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb)
 {
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
     Request req;
-    Addr addr = addressMonitor.vAddr;
+    Addr addr = monitor.vAddr;
     int block_size = cacheLineSize();
     uint64_t mask = ~((uint64_t)(block_size - 1));
     int size = block_size;
@@ -320,11 +329,11 @@ BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
     Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
     assert(fault == NoFault);
 
-    addressMonitor.pAddr = req.getPaddr() & mask;
-    addressMonitor.waiting = true;
+    monitor.pAddr = req.getPaddr() & mask;
+    monitor.waiting = true;
 
-    DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
-            addressMonitor.vAddr, addressMonitor.pAddr);
+    DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+            tid, monitor.vAddr, monitor.pAddr);
 }
 
 void
index 3a10841e0dacf7b47b4b26962bbd3994e6c509da..0286ac45b009994eecbe1912478b296bca8f101d 100644 (file)
@@ -559,14 +559,17 @@ class BaseCPU : public MemObject
     Stats::Scalar numWorkItemsCompleted;
 
   private:
-    AddressMonitor addressMonitor;
+    std::vector<AddressMonitor> addressMonitor;
 
   public:
-    void armMonitor(Addr address);
-    bool mwait(PacketPtr pkt);
-    void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb);
-    AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; }
-    void atomicNotify(Addr address);
+    void armMonitor(ThreadID tid, Addr address);
+    bool mwait(ThreadID tid, PacketPtr pkt);
+    void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb);
+    AddressMonitor *getCpuAddrMonitor(ThreadID tid)
+    {
+        assert(tid < numThreads);
+        return &addressMonitor[tid];
+    }
 };
 
 #endif // THE_ISA == NULL_ISA
index c2ef253a78ac70fe6115f8922d07da6611493d09..77117b8920edcae6420eea4d5f3aec74e1f8bdc5 100644 (file)
@@ -863,11 +863,12 @@ class BaseDynInst : public ExecContext, public RefCounted
 
   public:
     // monitor/mwait funtions
-    void armMonitor(Addr address) { cpu->armMonitor(address); }
-    bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); }
+    void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
+    bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
     void mwaitAtomic(ThreadContext *tc)
-    { return cpu->mwaitAtomic(tc, cpu->dtb); }
-    AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); }
+    { return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
+    AddressMonitor *getAddrMonitor()
+    { return cpu->getCpuAddrMonitor(threadNumber); }
 };
 
 template<class Impl>
index a363b6d0f6694c21716b166ce4988fcd9d3adb6d..69f47894b880f0be391297e4ced5e9b332ebb982 100644 (file)
@@ -350,11 +350,11 @@ class CheckerCPU : public BaseCPU, public ExecContext
     }
 
     // monitor/mwait funtions
-    virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
-    bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
+    virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); }
+    bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); }
     void mwaitAtomic(ThreadContext *tc)
-    { return BaseCPU::mwaitAtomic(tc, thread->dtb); }
-    AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
+    { return BaseCPU::mwaitAtomic(0, tc, thread->dtb); }
+    AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); }
 
     void demapInstPage(Addr vaddr, uint64_t asn)
     {
index 3e4ea5ea9ed46e5b5c732c8a1a3f9ff273432927..625d2b877688e9ecf60efbc3df237e07abd430bf 100644 (file)
@@ -343,12 +343,12 @@ class ExecContext : public ::ExecContext
 
   public:
     // monitor/mwait funtions
-    void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); }
-    bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); }
+    void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
+    bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
     void mwaitAtomic(ThreadContext *tc)
-    { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); }
+    { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
     AddressMonitor *getAddrMonitor()
-    { return getCpuPtr()->getCpuAddrMonitor(); }
+    { return getCpuPtr()->getCpuAddrMonitor(0); }
 };
 
 }
index 81fc99d37ac4b822658898e66de453c3f949bd66..84aaf02f580f21269f3083d5ad98fec237fd9b19 100644 (file)
@@ -135,7 +135,8 @@ Fetch1::fetchLine()
         "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
         request_id, aligned_pc, pc, line_offset, request_size);
 
-    request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
+    request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(),
+                                      /* thread id */ 0);
     request->request.setVirt(0 /* asid */,
         aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
         /* I've no idea why we need the PC, but give it */
index 376e8a0ff3da48677611a9b03c7f5399de628fcc..e644951f88d7762fc50928b801fe55173a3b6026 100644 (file)
@@ -1501,7 +1501,8 @@ LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data,
     if (inst->traceData)
         inst->traceData->setMem(addr, size, flags);
 
-    request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0);
+    int cid = cpu.threads[inst->id.threadId]->getTC()->contextId();
+    request->request.setThreadContext(cid, /* thread id */ 0);
     request->request.setVirt(0 /* asid */,
         addr, size, flags, cpu.dataMasterId(),
         /* I've no idea why we need the PC, but give it */
index 026907a9410386b667c0165e02110d7c9f56052c..4ab0048173d1225c2db406aaedb611f2df78225a 100644 (file)
@@ -118,9 +118,10 @@ template <class Impl>
 void
 FullO3CPU<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 {
-    // X86 ISA: Snooping an invalidation for monitor/mwait
-    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
-        cpu->wakeup();
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup();
+        }
     }
     lsq->recvTimingSnoopReq(pkt);
 }
index 6690c1da62316ef69e27a9815bb000ec0ef7aea6..2d9da25875e58b1131b5fbc89890b6e85f4ae4e5 100644 (file)
@@ -86,9 +86,10 @@ AtomicSimpleCPU::init()
 {
     BaseSimpleCPU::init();
 
-    ifetch_req.setThreadContext(_cpuId, 0);
-    data_read_req.setThreadContext(_cpuId, 0);
-    data_write_req.setThreadContext(_cpuId, 0);
+    int cid = threadContexts[0]->contextId();
+    ifetch_req.setThreadContext(cid, 0);
+    data_read_req.setThreadContext(cid, 0);
+    data_write_req.setThreadContext(cid, 0);
 }
 
 AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@@ -130,6 +131,24 @@ AtomicSimpleCPU::drain()
     }
 }
 
+void
+AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
+{
+    DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(),
+            pkt->cmdString());
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (tid != sender) {
+            if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+                wakeup();
+            }
+
+            TheISA::handleLockedSnoop(threadInfo[tid]->thread,
+                                      pkt, dcachePort.cacheBlockMask);
+        }
+    }
+}
+
 void
 AtomicSimpleCPU::drainResume()
 {
@@ -265,8 +284,11 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
-    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
-        cpu->wakeup();
+
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup();
+        }
     }
 
     // if snoop invalidates, release any associated locks
@@ -289,8 +311,10 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
     AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
-    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
-        cpu->wakeup();
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup();
+        }
     }
 
     // if snoop invalidates, release any associated locks
@@ -460,6 +484,9 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
                         system->getPhysMem().access(&pkt);
                     else
                         dcache_latency += dcachePort.sendAtomic(&pkt);
+
+                    // Notify other threads on this CPU of write
+                    threadSnoop(&pkt, curThread);
                 }
                 dcache_access = true;
                 assert(!pkt.isError());
@@ -516,9 +543,11 @@ AtomicSimpleCPU::tick()
 
     // Set memroy request ids to current thread
     if (numThreads > 1) {
-        ifetch_req.setThreadContext(_cpuId, curThread);
-        data_read_req.setThreadContext(_cpuId, curThread);
-        data_write_req.setThreadContext(_cpuId, curThread);
+        ContextID cid = threadContexts[curThread]->contextId();
+
+        ifetch_req.setThreadContext(cid, curThread);
+        data_read_req.setThreadContext(cid, curThread);
+        data_write_req.setThreadContext(cid, curThread);
     }
 
     SimpleExecContext& t_info = *threadInfo[curThread];
index 76ee9f897463305394837d02640286b33661f86c..2bea12ab2c3d3fea96fdf3fbf2d3b99e32f6b0f1 100644 (file)
@@ -186,6 +186,9 @@ class AtomicSimpleCPU : public BaseSimpleCPU
     /** Return a reference to the instruction port. */
     virtual MasterPort &getInstPort() { return icachePort; }
 
+    /** Perform snoop for other cpu-local thread contexts. */
+    void threadSnoop(PacketPtr pkt, ThreadID sender);
+
   public:
 
     DrainState drain() M5_ATTR_OVERRIDE;
index 673cadd77bf6c34d87fbdae00d2cbcbfa39c30dc..6e8845bf7dbe730c4a7ef47889685df10991e943 100644 (file)
@@ -418,9 +418,8 @@ BaseSimpleCPU::dbg_vtophys(Addr addr)
 void
 BaseSimpleCPU::wakeup()
 {
-    getCpuAddrMonitor()->gotWakeup = true;
-
     for (ThreadID tid = 0; tid < numThreads; tid++) {
+        getCpuAddrMonitor(tid)->gotWakeup = true;
         if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
             DPRINTF(Quiesce,"Suspended Processor awoke\n");
             threadInfo[tid]->thread->activate();
index f474cc358b5fe9fe8d85224201500f8376f1d62a..591cf822712c9e1390d7ef527ff9bc87c0418871 100644 (file)
@@ -376,22 +376,22 @@ class SimpleExecContext : public ExecContext {
 
     void armMonitor(Addr address) M5_ATTR_OVERRIDE
     {
-        cpu->armMonitor(address);
+        cpu->armMonitor(thread->threadId(), address);
     }
 
     bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
     {
-        return cpu->mwait(pkt);
+        return cpu->mwait(thread->threadId(), pkt);
     }
 
     void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
     {
-        cpu->mwaitAtomic(tc, thread->dtb);
+        cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb);
     }
 
     AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
     {
-        return cpu->getCpuAddrMonitor();
+        return cpu->getCpuAddrMonitor(thread->threadId());
     }
 
 #if THE_ISA == MIPS_ISA
index 487da36eaa84c99cfc2e435a523fe49c715fdca1..f3241f7e57dc55197981344d7666c7296ab65965 100644 (file)
@@ -302,6 +302,7 @@ TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
         if (do_access) {
             dcache_pkt = pkt;
             handleWritePacket();
+            threadSnoop(pkt, curThread);
         } else {
             _status = DcacheWaitResponse;
             completeDataAccess(pkt);
@@ -538,6 +539,19 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
     return NoFault;
 }
 
+void
+TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender)
+{
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        if (tid != sender) {
+            if(getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+                wakeup();
+            }
+            TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt,
+                    dcachePort.cacheBlockMask);
+        }
+    }
+}
 
 void
 TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
@@ -849,9 +863,10 @@ TimingSimpleCPU::updateCycleCounts()
 void
 TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 {
-    // X86 ISA: Snooping an invalidation for monitor/mwait
-    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
-        cpu->wakeup();
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup();
+        }
     }
 
     for (auto &t_info : cpu->threadInfo) {
@@ -862,9 +877,10 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
 void
 TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
 {
-    // X86 ISA: Snooping an invalidation for monitor/mwait
-    if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
-        cpu->wakeup();
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if(cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup();
+        }
     }
 }
 
index d409ac5d2191cd78af4cd87b3f9a8f1f5ae88799..f1cc09e42014bb669972a2aac31f3e261431e026 100644 (file)
@@ -132,6 +132,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
     };
     FetchTranslation fetchTranslation;
 
+    void threadSnoop(PacketPtr pkt, ThreadID sender);
     void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
     void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
                        uint8_t *data, bool read);