base,cpu,sim: Stop including arch/vtophys.hh when not using vtophys.
[gem5.git] / src / cpu / base.cc
index 893b0e06b845153d2b649598b594a78db48ea9eb..c0788db96e4cefe44d400cf17da0f9605d4fb9ae 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012,2016-2017, 2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -13,6 +13,8 @@
  *
  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  * Copyright (c) 2011 Regents of the University of California
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * Copyright (c) 2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Steve Reinhardt
- *          Nathan Binkert
- *          Rick Strong
  */
 
+#include "cpu/base.hh"
+
 #include <iostream>
 #include <sstream>
 #include <string>
 
-#include "arch/tlb.hh"
-#include "base/loader/symtab.hh"
+#include "arch/generic/tlb.hh"
 #include "base/cprintf.hh"
-#include "base/misc.hh"
+#include "base/loader/symtab.hh"
+#include "base/logging.hh"
 #include "base/output.hh"
 #include "base/trace.hh"
-#include "cpu/base.hh"
 #include "cpu/checker/cpu.hh"
 #include "cpu/cpuevent.hh"
 #include "cpu/profile.hh"
 #include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
 #include "debug/SyscallVerbose.hh"
+#include "debug/Thread.hh"
+#include "mem/page_table.hh"
 #include "params/BaseCPU.hh"
+#include "sim/clocked_object.hh"
 #include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/sim_events.hh"
@@ -90,8 +93,16 @@ void
 CPUProgressEvent::process()
 {
     Counter temp = cpu->totalOps();
+
+    if (_repeatEvent)
+      cpu->schedule(this, curTick() + _interval);
+
+    if (cpu->switchedOut()) {
+      return;
+    }
+
 #ifndef NDEBUG
-    double ipc = double(temp - lastNumInst) / (_interval / cpu->ticks(1));
+    double ipc = double(temp - lastNumInst) / (_interval / cpu->clockPeriod());
 
     DPRINTFN("%s progress event, total committed:%i, progress insts committed: "
              "%lli, IPC: %0.8d\n", cpu->name(), temp, temp - lastNumInst,
@@ -103,9 +114,6 @@ CPUProgressEvent::process()
             temp - lastNumInst);
 #endif
     lastNumInst = temp;
-
-    if (_repeatEvent)
-        cpu->schedule(this, curTick() + _interval);
 }
 
 const char *
@@ -115,15 +123,22 @@ CPUProgressEvent::description() const
 }
 
 BaseCPU::BaseCPU(Params *p, bool is_checker)
-    : MemObject(p), clock(p->clock), instCnt(0), _cpuId(p->cpu_id),
-      _instMasterId(p->system->getMasterId(name() + ".inst")),
-      _dataMasterId(p->system->getMasterId(name() + ".data")),
-      interrupts(p->interrupts),
+    : ClockedObject(p), instCnt(0), _cpuId(p->cpu_id), _socketId(p->socket_id),
+      _instMasterId(p->system->getMasterId(this, "inst")),
+      _dataMasterId(p->system->getMasterId(this, "data")),
+      _taskId(ContextSwitchTaskId::Unknown), _pid(invldPid),
+      _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
+      interrupts(p->interrupts), profileEvent(NULL),
       numThreads(p->numThreads), system(p->system),
-      phase(p->phase)
+      previousCycle(0), previousState(CPU_STATE_SLEEP),
+      functionTraceStream(nullptr), currentFunctionStart(0),
+      currentFunctionEnd(0), functionEntryTick(0),
+      addressMonitor(p->numThreads),
+      syscallRetryLatency(p->syscallRetryLatency),
+      pwrGatingLatency(p->pwr_gating_latency),
+      powerGatingOnIdle(p->power_gating_on_idle),
+      enterPwrGatingEvent([this]{ enterPwrGating(); }, name())
 {
-//    currentTick = curTick();
-
     // if Python did not provide a valid ID, do it here
     if (_cpuId == -1 ) {
         _cpuId = cpuList.size();
@@ -132,77 +147,16 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
     // add self to global list of CPUs
     cpuList.push_back(this);
 
-    DPRINTF(SyscallVerbose, "Constructing CPU with id %d\n", _cpuId);
+    DPRINTF(SyscallVerbose, "Constructing CPU with id %d, socket id %d\n",
+                _cpuId, _socketId);
 
     if (numThreads > maxThreadsPerCPU)
         maxThreadsPerCPU = numThreads;
 
-    // allocate per-thread instruction-based event queues
-    comInstEventQueue = new EventQueue *[numThreads];
-    for (ThreadID tid = 0; tid < numThreads; ++tid)
-        comInstEventQueue[tid] =
-            new EventQueue("instruction-based event queue");
-
-    //
-    // set up instruction-count-based termination events, if any
-    //
-    if (p->max_insts_any_thread != 0) {
-        const char *cause = "a thread reached the max instruction count";
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comInstEventQueue[tid]->schedule(event, p->max_insts_any_thread);
-        }
-    }
-
-    if (p->max_insts_all_threads != 0) {
-        const char *cause = "all threads reached the max instruction count";
-
-        // allocate & initialize shared downcounter: each event will
-        // decrement this when triggered; simulation will terminate
-        // when counter reaches 0
-        int *counter = new int;
-        *counter = numThreads;
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new CountedExitEvent(cause, *counter);
-            comInstEventQueue[tid]->schedule(event, p->max_insts_all_threads);
-        }
-    }
-
-    // allocate per-thread load-based event queues
-    comLoadEventQueue = new EventQueue *[numThreads];
-    for (ThreadID tid = 0; tid < numThreads; ++tid)
-        comLoadEventQueue[tid] = new EventQueue("load-based event queue");
-
-    //
-    // set up instruction-count-based termination events, if any
-    //
-    if (p->max_loads_any_thread != 0) {
-        const char *cause = "a thread reached the max load count";
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comLoadEventQueue[tid]->schedule(event, p->max_loads_any_thread);
-        }
-    }
-
-    if (p->max_loads_all_threads != 0) {
-        const char *cause = "all threads reached the max load count";
-        // allocate & initialize shared downcounter: each event will
-        // decrement this when triggered; simulation will terminate
-        // when counter reaches 0
-        int *counter = new int;
-        *counter = numThreads;
-        for (ThreadID tid = 0; tid < numThreads; ++tid) {
-            Event *event = new CountedExitEvent(cause, *counter);
-            comLoadEventQueue[tid]->schedule(event, p->max_loads_all_threads);
-        }
-    }
-
     functionTracingEnabled = false;
     if (p->function_trace) {
         const string fname = csprintf("ftrace.%s", name());
-        functionTraceStream = simout.find(fname);
-        if (!functionTraceStream)
-            functionTraceStream = simout.create(fname);
+        functionTraceStream = simout.findOrCreate(fname)->stream();
 
         currentFunctionStart = currentFunctionEnd = 0;
         functionEntryTick = p->function_trace_start;
@@ -210,29 +164,35 @@ BaseCPU::BaseCPU(Params *p, bool is_checker)
         if (p->function_trace_start == 0) {
             functionTracingEnabled = true;
         } else {
-            typedef EventWrapper<BaseCPU, &BaseCPU::enableFunctionTrace> wrap;
-            Event *event = new wrap(this, true);
+            Event *event = new EventFunctionWrapper(
+                [this]{ enableFunctionTrace(); }, name(), true);
             schedule(event, p->function_trace_start);
         }
     }
 
     // The interrupts should always be present unless this CPU is
     // switched in later or in case it is a checker CPU
-    if (!params()->defer_registration && !is_checker) {
-        if (interrupts) {
-            interrupts->setCPU(this);
-        } else {
-            fatal("CPU %s has no interrupt controller.\n"
-                  "Ensure createInterruptController() is called.\n", name());
-        }
+    if (!params()->switched_out && !is_checker) {
+        fatal_if(interrupts.size() != numThreads,
+                 "CPU %s has %i interrupt controllers, but is expecting one "
+                 "per thread (%i)\n",
+                 name(), interrupts.size(), numThreads);
+        for (ThreadID tid = 0; tid < numThreads; tid++)
+            interrupts[tid]->setCPU(this);
     }
 
     if (FullSystem) {
-        profileEvent = NULL;
         if (params()->profile)
-            profileEvent = new ProfileEvent(this, params()->profile);
+            profileEvent = new EventFunctionWrapper(
+                [this]{ processProfileEvent(); },
+                name());
     }
     tracer = params()->tracer;
+
+    if (params()->isa.size() != numThreads) {
+        fatal("Number of ISAs (%i) assigned to the CPU does not equal number "
+              "of threads (%i).\n", params()->isa.size(), numThreads);
+    }
 }
 
 void
@@ -244,36 +204,187 @@ BaseCPU::enableFunctionTrace()
 BaseCPU::~BaseCPU()
 {
     delete profileEvent;
-    delete[] comLoadEventQueue;
-    delete[] comInstEventQueue;
+}
+
+void
+BaseCPU::armMonitor(ThreadID tid, Addr address)
+{
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
+    monitor.armed = true;
+    monitor.vAddr = address;
+    monitor.pAddr = 0x0;
+    DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address);
+}
+
+bool
+BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
+{
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
+    if (!monitor.gotWakeup) {
+        int block_size = cacheLineSize();
+        uint64_t mask = ~((uint64_t)(block_size - 1));
+
+        assert(pkt->req->hasPaddr());
+        monitor.pAddr = pkt->getAddr() & mask;
+        monitor.waiting = true;
+
+        DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, "
+                "line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr);
+        return true;
+    } else {
+        monitor.gotWakeup = false;
+        return false;
+    }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseTLB *dtb)
+{
+    assert(tid < numThreads);
+    AddressMonitor &monitor = addressMonitor[tid];
+
+    RequestPtr req = std::make_shared<Request>();
+
+    Addr addr = monitor.vAddr;
+    int block_size = cacheLineSize();
+    uint64_t mask = ~((uint64_t)(block_size - 1));
+    int size = block_size;
+
+    //The address of the next line if it crosses a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+    if (secondAddr > addr)
+        size = secondAddr - addr;
+
+    req->setVirt(addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+    // translate to physical address
+    Fault fault = dtb->translateAtomic(req, tc, BaseTLB::Read);
+    assert(fault == NoFault);
+
+    monitor.pAddr = req->getPaddr() & mask;
+    monitor.waiting = true;
+
+    DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+            tid, monitor.vAddr, monitor.pAddr);
 }
 
 void
 BaseCPU::init()
 {
-    if (!params()->defer_registration)
+    // Set up instruction-count-based termination events, if any. This needs
+    // to happen after threadContexts has been constructed.
+    if (params()->max_insts_any_thread != 0) {
+        const char *cause = "a thread reached the max instruction count";
+        for (ThreadID tid = 0; tid < numThreads; ++tid)
+            scheduleInstStop(tid, params()->max_insts_any_thread, cause);
+    }
+
+    // Set up instruction-count-based termination events for SimPoints
+    // Typically, there are more than one action points.
+    // Simulation.py is responsible to take the necessary actions upon
+    // exitting the simulation loop.
+    if (!params()->simpoint_start_insts.empty()) {
+        const char *cause = "simpoint starting point found";
+        for (size_t i = 0; i < params()->simpoint_start_insts.size(); ++i)
+            scheduleInstStop(0, params()->simpoint_start_insts[i], cause);
+    }
+
+    if (params()->max_insts_all_threads != 0) {
+        const char *cause = "all threads reached the max instruction count";
+
+        // allocate & initialize shared downcounter: each event will
+        // decrement this when triggered; simulation will terminate
+        // when counter reaches 0
+        int *counter = new int;
+        *counter = numThreads;
+        for (ThreadID tid = 0; tid < numThreads; ++tid) {
+            Event *event = new CountedExitEvent(cause, *counter);
+            threadContexts[tid]->scheduleInstCountEvent(
+                    event, params()->max_insts_all_threads);
+        }
+    }
+
+    if (!params()->switched_out) {
         registerThreadContexts();
+
+        verifyMemoryMode();
+    }
 }
 
 void
 BaseCPU::startup()
 {
     if (FullSystem) {
-        if (!params()->defer_registration && profileEvent)
+        if (!params()->switched_out && profileEvent)
             schedule(profileEvent, curTick());
     }
 
     if (params()->progress_interval) {
-        Tick num_ticks = ticks(params()->progress_interval);
-
-        new CPUProgressEvent(this, num_ticks);
+        new CPUProgressEvent(this, params()->progress_interval);
     }
+
+    if (_switchedOut)
+        ClockedObject::pwrState(Enums::PwrState::OFF);
+
+    // Assumption CPU start to operate instantaneously without any latency
+    if (ClockedObject::pwrState() == Enums::PwrState::UNDEFINED)
+        ClockedObject::pwrState(Enums::PwrState::ON);
+
 }
 
+ProbePoints::PMUUPtr
+BaseCPU::pmuProbePoint(const char *name)
+{
+    ProbePoints::PMUUPtr ptr;
+    ptr.reset(new ProbePoints::PMU(getProbeManager(), name));
+
+    return ptr;
+}
+
+void
+BaseCPU::regProbePoints()
+{
+    ppAllCycles = pmuProbePoint("Cycles");
+    ppActiveCycles = pmuProbePoint("ActiveCycles");
+
+    ppRetiredInsts = pmuProbePoint("RetiredInsts");
+    ppRetiredInstsPC = pmuProbePoint("RetiredInstsPC");
+    ppRetiredLoads = pmuProbePoint("RetiredLoads");
+    ppRetiredStores = pmuProbePoint("RetiredStores");
+    ppRetiredBranches = pmuProbePoint("RetiredBranches");
+
+    ppSleeping = new ProbePointArg<bool>(this->getProbeManager(),
+                                         "Sleeping");
+}
+
+void
+BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
+{
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        ppRetiredInsts->notify(1);
+        ppRetiredInstsPC->notify(pc);
+    }
+
+    if (inst->isLoad())
+        ppRetiredLoads->notify(1);
+
+    if (inst->isStore() || inst->isAtomic())
+        ppRetiredStores->notify(1);
+
+    if (inst->isControl())
+        ppRetiredBranches->notify(1);
+}
 
 void
 BaseCPU::regStats()
 {
+    ClockedObject::regStats();
+
     using namespace Stats;
 
     numCycles
@@ -302,66 +413,64 @@ BaseCPU::regStats()
         threadContexts[0]->regStats(name());
 }
 
-MasterPort &
-BaseCPU::getMasterPort(const string &if_name, int idx)
+Port &
+BaseCPU::getPort(const string &if_name, PortID idx)
 {
     // Get the right port based on name. This applies to all the
     // subclasses of the base CPU and relies on their implementation
-    // of getDataPort and getInstPort. In all cases there methods
-    // return a CpuPort pointer.
+    // of getDataPort and getInstPort.
     if (if_name == "dcache_port")
         return getDataPort();
     else if (if_name == "icache_port")
         return getInstPort();
     else
-        return MemObject::getMasterPort(if_name, idx);
-}
-
-Tick
-BaseCPU::nextCycle()
-{
-    Tick next_tick = curTick() - phase + clock - 1;
-    next_tick -= (next_tick % clock);
-    next_tick += phase;
-    return next_tick;
-}
-
-Tick
-BaseCPU::nextCycle(Tick begin_tick)
-{
-    Tick next_tick = begin_tick;
-    if (next_tick % clock != 0)
-        next_tick = next_tick - (next_tick % clock) + clock;
-    next_tick += phase;
-
-    assert(next_tick >= curTick());
-    return next_tick;
+        return ClockedObject::getPort(if_name, idx);
 }
 
 void
 BaseCPU::registerThreadContexts()
 {
+    assert(system->multiThread || numThreads == 1);
+
     ThreadID size = threadContexts.size();
     for (ThreadID tid = 0; tid < size; ++tid) {
         ThreadContext *tc = threadContexts[tid];
 
-        /** This is so that contextId and cpuId match where there is a
-         * 1cpu:1context relationship.  Otherwise, the order of registration
-         * could affect the assignment and cpu 1 could have context id 3, for
-         * example.  We may even want to do something like this for SMT so that
-         * cpu 0 has the lowest thread contexts and cpu N has the highest, but
-         * I'll just do this for now
-         */
-        if (numThreads == 1)
-            tc->setContextId(system->registerThreadContext(tc, _cpuId));
-        else
+        if (system->multiThread) {
             tc->setContextId(system->registerThreadContext(tc));
+        } else {
+            tc->setContextId(system->registerThreadContext(tc, _cpuId));
+        }
 
         if (!FullSystem)
             tc->getProcessPtr()->assignThreadContext(tc->contextId());
     }
 }
 
+void
+BaseCPU::deschedulePowerGatingEvent()
+{
+    if (enterPwrGatingEvent.scheduled()){
+        deschedule(enterPwrGatingEvent);
+    }
+}
+
+void
+BaseCPU::schedulePowerGatingEvent()
+{
+    for (auto tc : threadContexts) {
+        if (tc->status() == ThreadContext::Active)
+            return;
+    }
+
+    if (ClockedObject::pwrState() == Enums::PwrState::CLK_GATED &&
+        powerGatingOnIdle) {
+        assert(!enterPwrGatingEvent.scheduled());
+        // Schedule a power gating event when clock gated for the specified
+        // amount of time
+        schedule(enterPwrGatingEvent, clockEdge(pwrGatingLatency));
+    }
+}
 
 int
 BaseCPU::findContext(ThreadContext *tc)
@@ -374,11 +483,72 @@ BaseCPU::findContext(ThreadContext *tc)
     return 0;
 }
 
+void
+BaseCPU::activateContext(ThreadID thread_num)
+{
+    DPRINTF(Thread, "activate contextId %d\n",
+            threadContexts[thread_num]->contextId());
+    // Squash enter power gating event while cpu gets activated
+    if (enterPwrGatingEvent.scheduled())
+        deschedule(enterPwrGatingEvent);
+    // For any active thread running, update CPU power state to active (ON)
+    ClockedObject::pwrState(Enums::PwrState::ON);
+
+    updateCycleCounters(CPU_STATE_WAKEUP);
+}
+
+void
+BaseCPU::suspendContext(ThreadID thread_num)
+{
+    DPRINTF(Thread, "suspend contextId %d\n",
+            threadContexts[thread_num]->contextId());
+    // Check if all threads are suspended
+    for (auto t : threadContexts) {
+        if (t->status() != ThreadContext::Suspended) {
+            return;
+        }
+    }
+
+    // All CPU thread are suspended, update cycle count
+    updateCycleCounters(CPU_STATE_SLEEP);
+
+    // All CPU threads suspended, enter lower power state for the CPU
+    ClockedObject::pwrState(Enums::PwrState::CLK_GATED);
+
+    // If pwrGatingLatency is set to 0 then this mechanism is disabled
+    if (powerGatingOnIdle) {
+        // Schedule power gating event when clock gated for pwrGatingLatency
+        // cycles
+        schedule(enterPwrGatingEvent, clockEdge(pwrGatingLatency));
+    }
+}
+
+void
+BaseCPU::haltContext(ThreadID thread_num)
+{
+    updateCycleCounters(BaseCPU::CPU_STATE_SLEEP);
+}
+
+void
+BaseCPU::enterPwrGating(void)
+{
+    ClockedObject::pwrState(Enums::PwrState::OFF);
+}
+
 void
 BaseCPU::switchOut()
 {
+    assert(!_switchedOut);
+    _switchedOut = true;
     if (profileEvent && profileEvent->scheduled())
         deschedule(profileEvent);
+
+    // Flush all TLBs in the CPU to avoid having stale translations if
+    // it gets switched in later.
+    flushTLBs();
+
+    // Go to the power gating state
+    ClockedObject::pwrState(Enums::PwrState::OFF);
 }
 
 void
@@ -386,6 +556,17 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
 {
     assert(threadContexts.size() == oldCPU->threadContexts.size());
     assert(_cpuId == oldCPU->cpuId());
+    assert(_switchedOut);
+    assert(oldCPU != this);
+    _pid = oldCPU->getPid();
+    _taskId = oldCPU->taskId();
+    // Take over the power state of the switchedOut CPU
+    ClockedObject::pwrState(oldCPU->pwrState());
+
+    previousState = oldCPU->previousState;
+    previousCycle = oldCPU->previousCycle;
+
+    _switchedOut = false;
 
     ThreadID size = threadContexts.size();
     for (ThreadID i = 0; i < size; ++i) {
@@ -407,58 +588,49 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
             ThreadContext::compare(oldTC, newTC);
         */
 
-        MasterPort *old_itb_port = oldTC->getITBPtr()->getMasterPort();
-        MasterPort *old_dtb_port = oldTC->getDTBPtr()->getMasterPort();
-        MasterPort *new_itb_port = newTC->getITBPtr()->getMasterPort();
-        MasterPort *new_dtb_port = newTC->getDTBPtr()->getMasterPort();
+        Port *old_itb_port = oldTC->getITBPtr()->getTableWalkerPort();
+        Port *old_dtb_port = oldTC->getDTBPtr()->getTableWalkerPort();
+        Port *new_itb_port = newTC->getITBPtr()->getTableWalkerPort();
+        Port *new_dtb_port = newTC->getDTBPtr()->getTableWalkerPort();
 
         // Move over any table walker ports if they exist
-        if (new_itb_port && !new_itb_port->isConnected()) {
-            assert(old_itb_port);
-            SlavePort &slavePort = old_itb_port->getSlavePort();
-            new_itb_port->bind(slavePort);
-            old_itb_port->unBind();
-        }
-        if (new_dtb_port && !new_dtb_port->isConnected()) {
-            assert(old_dtb_port);
-            SlavePort &slavePort = old_dtb_port->getSlavePort();
-            new_dtb_port->bind(slavePort);
-            old_dtb_port->unBind();
-        }
+        if (new_itb_port)
+            new_itb_port->takeOverFrom(old_itb_port);
+        if (new_dtb_port)
+            new_dtb_port->takeOverFrom(old_dtb_port);
+        newTC->getITBPtr()->takeOverFrom(oldTC->getITBPtr());
+        newTC->getDTBPtr()->takeOverFrom(oldTC->getDTBPtr());
 
         // Checker whether or not we have to transfer CheckerCPU
         // objects over in the switch
         CheckerCPU *oldChecker = oldTC->getCheckerCpuPtr();
         CheckerCPU *newChecker = newTC->getCheckerCpuPtr();
         if (oldChecker && newChecker) {
-            MasterPort *old_checker_itb_port =
-                oldChecker->getITBPtr()->getMasterPort();
-            MasterPort *old_checker_dtb_port =
-                oldChecker->getDTBPtr()->getMasterPort();
-            MasterPort *new_checker_itb_port =
-                newChecker->getITBPtr()->getMasterPort();
-            MasterPort *new_checker_dtb_port =
-                newChecker->getDTBPtr()->getMasterPort();
+            Port *old_checker_itb_port =
+                oldChecker->getITBPtr()->getTableWalkerPort();
+            Port *old_checker_dtb_port =
+                oldChecker->getDTBPtr()->getTableWalkerPort();
+            Port *new_checker_itb_port =
+                newChecker->getITBPtr()->getTableWalkerPort();
+            Port *new_checker_dtb_port =
+                newChecker->getDTBPtr()->getTableWalkerPort();
+
+            newChecker->getITBPtr()->takeOverFrom(oldChecker->getITBPtr());
+            newChecker->getDTBPtr()->takeOverFrom(oldChecker->getDTBPtr());
 
             // Move over any table walker ports if they exist for checker
-            if (new_checker_itb_port && !new_checker_itb_port->isConnected()) {
-                assert(old_checker_itb_port);
-                SlavePort &slavePort = old_checker_itb_port->getSlavePort();;
-                new_checker_itb_port->bind(slavePort);
-                old_checker_itb_port->unBind();
-            }
-            if (new_checker_dtb_port && !new_checker_dtb_port->isConnected()) {
-                assert(old_checker_dtb_port);
-                SlavePort &slavePort = old_checker_dtb_port->getSlavePort();;
-                new_checker_dtb_port->bind(slavePort);
-                old_checker_dtb_port->unBind();
-            }
+            if (new_checker_itb_port)
+                new_checker_itb_port->takeOverFrom(old_checker_itb_port);
+            if (new_checker_dtb_port)
+                new_checker_dtb_port->takeOverFrom(old_checker_dtb_port);
         }
     }
 
     interrupts = oldCPU->interrupts;
-    interrupts->setCPU(this);
-    oldCPU->interrupts = NULL;
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        interrupts[tid]->setCPU(this);
+    }
+    oldCPU->interrupts.clear();
 
     if (FullSystem) {
         for (ThreadID i = 0; i < size; ++i)
@@ -468,51 +640,114 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
             schedule(profileEvent, curTick());
     }
 
-    // Connect new CPU to old CPU's memory only if new CPU isn't
-    // connected to anything.  Also connect old CPU's memory to new
-    // CPU.
-    if (!getInstPort().isConnected()) {
-        getInstPort().bind(oldCPU->getInstPort().getSlavePort());
-        oldCPU->getInstPort().unBind();
-    }
+    // All CPUs have an instruction and a data port, and the new CPU's
+    // ports are dangling while the old CPU has its ports connected
+    // already. Unbind the old CPU and then bind the ports of the one
+    // we are switching to.
+    getInstPort().takeOverFrom(&oldCPU->getInstPort());
+    getDataPort().takeOverFrom(&oldCPU->getDataPort());
+}
 
-    if (!getDataPort().isConnected()) {
-        getDataPort().bind(oldCPU->getDataPort().getSlavePort());
-        oldCPU->getDataPort().unBind();
+void
+BaseCPU::flushTLBs()
+{
+    for (ThreadID i = 0; i < threadContexts.size(); ++i) {
+        ThreadContext &tc(*threadContexts[i]);
+        CheckerCPU *checker(tc.getCheckerCpuPtr());
+
+        tc.getITBPtr()->flushAll();
+        tc.getDTBPtr()->flushAll();
+        if (checker) {
+            checker->getITBPtr()->flushAll();
+            checker->getDTBPtr()->flushAll();
+        }
     }
 }
 
-
-BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, Tick _interval)
-    : cpu(_cpu), interval(_interval)
-{ }
-
 void
-BaseCPU::ProfileEvent::process()
+BaseCPU::processProfileEvent()
 {
-    ThreadID size = cpu->threadContexts.size();
-    for (ThreadID i = 0; i < size; ++i) {
-        ThreadContext *tc = cpu->threadContexts[i];
-        tc->profileSample();
-    }
+    ThreadID size = threadContexts.size();
 
-    cpu->schedule(this, curTick() + interval);
+    for (ThreadID i = 0; i < size; ++i)
+        threadContexts[i]->profileSample();
+
+    schedule(profileEvent, curTick() + params()->profile);
 }
 
 void
-BaseCPU::serialize(std::ostream &os)
+BaseCPU::serialize(CheckpointOut &cp) const
 {
     SERIALIZE_SCALAR(instCnt);
-    interrupts->serialize(os);
+
+    if (!_switchedOut) {
+        /* Unlike _pid, _taskId is not serialized, as they are dynamically
+         * assigned unique ids that are only meaningful for the duration of
+         * a specific run. We will need to serialize the entire taskMap in
+         * system. */
+        SERIALIZE_SCALAR(_pid);
+
+        // Serialize the threads, this is done by the CPU implementation.
+        for (ThreadID i = 0; i < numThreads; ++i) {
+            ScopedCheckpointSection sec(cp, csprintf("xc.%i", i));
+            interrupts[i]->serialize(cp);
+            serializeThread(cp, i);
+        }
+    }
 }
 
 void
-BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
+BaseCPU::unserialize(CheckpointIn &cp)
 {
     UNSERIALIZE_SCALAR(instCnt);
-    interrupts->unserialize(cp, section);
+
+    if (!_switchedOut) {
+        UNSERIALIZE_SCALAR(_pid);
+
+        // Unserialize the threads, this is done by the CPU implementation.
+        for (ThreadID i = 0; i < numThreads; ++i) {
+            ScopedCheckpointSection sec(cp, csprintf("xc.%i", i));
+            interrupts[i]->unserialize(cp);
+            unserializeThread(cp, i);
+        }
+    }
+}
+
+void
+BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
+{
+    const Tick now(getCurrentInstCount(tid));
+    Event *event(new LocalSimLoopExitEvent(cause, 0));
+
+    threadContexts[tid]->scheduleInstCountEvent(event, now + insts);
+}
+
+Tick
+BaseCPU::getCurrentInstCount(ThreadID tid)
+{
+    return threadContexts[tid]->getCurrentInstCount();
+}
+
+AddressMonitor::AddressMonitor() {
+    armed = false;
+    waiting = false;
+    gotWakeup = false;
 }
 
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+    assert(pkt->req->hasPaddr());
+    if (armed && waiting) {
+        if (pAddr == pkt->getAddr()) {
+            DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+                    pkt->getAddr());
+            waiting = false;
+            return true;
+        }
+    }
+    return false;
+}
+
+
 void
 BaseCPU::traceFunctionsInternal(Addr pc)
 {
@@ -541,22 +776,7 @@ BaseCPU::traceFunctionsInternal(Addr pc)
 }
 
 bool
-BaseCPU::CpuPort::recvTimingResp(PacketPtr pkt)
-{
-    panic("BaseCPU doesn't expect recvTiming!\n");
-    return true;
-}
-
-void
-BaseCPU::CpuPort::recvRetry()
-{
-    panic("BaseCPU doesn't expect recvRetry!\n");
-}
-
-void
-BaseCPU::CpuPort::recvFunctionalSnoop(PacketPtr pkt)
+BaseCPU::waitForRemoteGDB() const
 {
-    // No internal storage to update (in the general case). A CPU with
-    // internal storage, e.g. an LSQ that should be part of the
-    // coherent memory has to check against stored data.
+    return params()->wait_for_remote_gdb;
 }