cpu: re-organizes the branch predictor structure.
[gem5.git] / src / cpu / base.cc
index 36caea79aa163b30e31d24b373f0e1255d50f5a1..b761e714e56d067a3b6e45ee6f34d221f56357ba 100644 (file)
@@ -13,6 +13,8 @@
  *
  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  * Copyright (c) 2011 Regents of the University of California
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * Copyright (c) 2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "base/misc.hh"
 #include "base/output.hh"
 #include "base/trace.hh"
-#include "cpu/base.hh"
 #include "cpu/checker/cpu.hh"
+#include "cpu/base.hh"
 #include "cpu/cpuevent.hh"
 #include "cpu/profile.hh"
 #include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
 #include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
 #include "params/BaseCPU.hh"
 #include "sim/full_system.hh"
 #include "sim/process.hh"
@@ -115,13 +119,16 @@ CPUProgressEvent::description() const
 }
 
 BaseCPU::BaseCPU(Params *p, bool is_checker)
-    : MemObject(p), instCnt(0), _cpuId(p->cpu_id),
+    : MemObject(p), instCnt(0), _cpuId(p->cpu_id), _socketId(p->socket_id),
       _instMasterId(p->system->getMasterId(name() + ".inst")),
       _dataMasterId(p->system->getMasterId(name() + ".data")),
       _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
-      _switchedOut(p->switched_out),
+      _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
       interrupts(p->interrupts), profileEvent(NULL),
-      numThreads(p->numThreads), system(p->system)
+      numThreads(p->numThreads), system(p->system),
+      functionTraceStream(nullptr), currentFunctionStart(0),
+      currentFunctionEnd(0), functionEntryTick(0),
+      addressMonitor()
 {
     // if Python did not provide a valid ID, do it here
     if (_cpuId == -1 ) {
@@ -131,7 +138,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
     // add self to global list of CPUs
     cpuList.push_back(this);
 
-    DPRINTF(SyscallVerbose, "Constructing CPU with id %d\n", _cpuId);
+    DPRINTF(SyscallVerbose, "Constructing CPU with id %d, socket id %d\n",
+                _cpuId, _socketId);
 
     if (numThreads > maxThreadsPerCPU)
         maxThreadsPerCPU = numThreads;
@@ -147,10 +155,18 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
     //
     if (p->max_insts_any_thread != 0) {
         const char *cause = "a thread reached the max instruction count";
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comInstEventQueue[tid]->schedule(event, p->max_insts_any_thread);
-        }
+        for (ThreadID tid = 0; tid < numThreads; ++tid)
+            scheduleInstStop(tid, p->max_insts_any_thread, cause);
+    }
+
+    // Set up instruction-count-based termination events for SimPoints
+    // Typically, there are more than one action points.
+    // Simulation.py is responsible to take the necessary actions upon
+    // exitting the simulation loop.
+    if (!p->simpoint_start_insts.empty()) {
+        const char *cause = "simpoint starting point found";
+        for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i)
+            scheduleInstStop(0, p->simpoint_start_insts[i], cause);
     }
 
     if (p->max_insts_all_threads != 0) {
@@ -177,10 +193,8 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
     //
     if (p->max_loads_any_thread != 0) {
         const char *cause = "a thread reached the max load count";
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comLoadEventQueue[tid]->schedule(event, p->max_loads_any_thread);
-        }
+        for (ThreadID tid = 0; tid < numThreads; ++tid)
+            scheduleLoadStop(tid, p->max_loads_any_thread, cause);
     }
 
     if (p->max_loads_all_threads != 0) {
@@ -251,6 +265,63 @@ BaseCPU::~BaseCPU()
     delete[] comInstEventQueue;
 }
 
+void
+BaseCPU::armMonitor(Addr address)
+{
+    addressMonitor.armed = true;
+    addressMonitor.vAddr = address;
+    addressMonitor.pAddr = 0x0;
+    DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+    if(addressMonitor.gotWakeup == false) {
+        int block_size = cacheLineSize();
+        uint64_t mask = ~((uint64_t)(block_size - 1));
+
+        assert(pkt->req->hasPaddr());
+        addressMonitor.pAddr = pkt->getAddr() & mask;
+        addressMonitor.waiting = true;
+
+        DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+                addressMonitor.vAddr, addressMonitor.pAddr);
+        return true;
+    } else {
+        addressMonitor.gotWakeup = false;
+        return false;
+    }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+    Request req;
+    Addr addr = addressMonitor.vAddr;
+    int block_size = cacheLineSize();
+    uint64_t mask = ~((uint64_t)(block_size - 1));
+    int size = block_size;
+
+    //The address of the next line if it crosses a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+    if (secondAddr > addr)
+        size = secondAddr - addr;
+
+    req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+    // translate to physical address
+    Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+    assert(fault == NoFault);
+
+    addressMonitor.pAddr = req.getPaddr() & mask;
+    addressMonitor.waiting = true;
+
+    DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+            addressMonitor.vAddr, addressMonitor.pAddr);
+}
+
 void
 BaseCPU::init()
 {
@@ -274,6 +345,42 @@ BaseCPU::startup()
     }
 }
 
+ProbePoints::PMUUPtr
+BaseCPU::pmuProbePoint(const char *name)
+{
+    ProbePoints::PMUUPtr ptr;
+    ptr.reset(new ProbePoints::PMU(getProbeManager(), name));
+
+    return ptr;
+}
+
+void
+BaseCPU::regProbePoints()
+{
+    ppCycles = pmuProbePoint("Cycles");
+
+    ppRetiredInsts = pmuProbePoint("RetiredInsts");
+    ppRetiredLoads = pmuProbePoint("RetiredLoads");
+    ppRetiredStores = pmuProbePoint("RetiredStores");
+    ppRetiredBranches = pmuProbePoint("RetiredBranches");
+}
+
+void
+BaseCPU::probeInstCommit(const StaticInstPtr &inst)
+{
+    if (!inst->isMicroop() || inst->isLastMicroop())
+        ppRetiredInsts->notify(1);
+
+
+    if (inst->isLoad())
+        ppRetiredLoads->notify(1);
+
+    if (inst->isStore())
+        ppRetiredStores->notify(1);
+
+    if (inst->isControl())
+        ppRetiredBranches->notify(1);
+}
 
 void
 BaseCPU::regStats()
@@ -312,7 +419,7 @@ BaseCPU::getMasterPort(const string &if_name, PortID idx)
     // Get the right port based on name. This applies to all the
     // subclasses of the base CPU and relies on their implementation
     // of getDataPort and getInstPort. In all cases there methods
-    // return a CpuPort pointer.
+    // return a MasterPort pointer.
     if (if_name == "dcache_port")
         return getDataPort();
     else if (if_name == "icache_port")
@@ -423,6 +530,8 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
             old_dtb_port->unbind();
             new_dtb_port->bind(slavePort);
         }
+        newTC->getITBPtr()->takeOverFrom(oldTC->getITBPtr());
+        newTC->getDTBPtr()->takeOverFrom(oldTC->getDTBPtr());
 
         // Checker whether or not we have to transfer CheckerCPU
         // objects over in the switch
@@ -438,6 +547,9 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
             BaseMasterPort *new_checker_dtb_port =
                 newChecker->getDTBPtr()->getMasterPort();
 
+            newChecker->getITBPtr()->takeOverFrom(oldChecker->getITBPtr());
+            newChecker->getDTBPtr()->takeOverFrom(oldChecker->getDTBPtr());
+
             // Move over any table walker ports if they exist for checker
             if (new_checker_itb_port) {
                 assert(!new_checker_itb_port->isConnected());
@@ -559,6 +671,44 @@ BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
     }
 }
 
+void
+BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
+{
+    const Tick now(comInstEventQueue[tid]->getCurTick());
+    Event *event(new LocalSimLoopExitEvent(cause, 0));
+
+    comInstEventQueue[tid]->schedule(event, now + insts);
+}
+
+AddressMonitor::AddressMonitor() {
+    armed = false;
+    waiting = false;
+    gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+    assert(pkt->req->hasPaddr());
+    if(armed && waiting) {
+        if(pAddr == pkt->getAddr()) {
+            DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+                    pkt->getAddr());
+            waiting = false;
+            return true;
+        }
+    }
+    return false;
+}
+
+void
+BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
+{
+    const Tick now(comLoadEventQueue[tid]->getCurTick());
+    Event *event(new LocalSimLoopExitEvent(cause, 0));
+
+    comLoadEventQueue[tid]->schedule(event, now + loads);
+}
+
+
 void
 BaseCPU::traceFunctionsInternal(Addr pc)
 {
@@ -585,24 +735,3 @@ BaseCPU::traceFunctionsInternal(Addr pc)
         functionEntryTick = curTick();
     }
 }
-
-bool
-BaseCPU::CpuPort::recvTimingResp(PacketPtr pkt)
-{
-    panic("BaseCPU doesn't expect recvTiming!\n");
-    return true;
-}
-
-void
-BaseCPU::CpuPort::recvRetry()
-{
-    panic("BaseCPU doesn't expect recvRetry!\n");
-}
-
-void
-BaseCPU::CpuPort::recvFunctionalSnoop(PacketPtr pkt)
-{
-    // No internal storage to update (in the general case). A CPU with
-    // internal storage, e.g. an LSQ that should be part of the
-    // coherent memory has to check against stored data.
-}