cpu: o3: replace issueLatency with bool pipelined
[gem5.git] / src / cpu / base.cc
index 16760613598830557ead10db0f22ae6078f17ff7..4d8b09ed2572a57e58217a8f4fda86dfe12fe729 100644 (file)
@@ -1,5 +1,20 @@
 /*
+ * Copyright (c) 2011-2012 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * Copyright (c) 2011 Regents of the University of California
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * Copyright (c) 2013 Mark D. Hill and David A. Wood
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  *
  * Authors: Steve Reinhardt
  *          Nathan Binkert
+ *          Rick Strong
  */
 
 #include <iostream>
-#include <string>
 #include <sstream>
+#include <string>
 
-#include "base/cprintf.hh"
+#include "arch/tlb.hh"
 #include "base/loader/symtab.hh"
+#include "base/cprintf.hh"
 #include "base/misc.hh"
 #include "base/output.hh"
 #include "base/trace.hh"
+#include "cpu/checker/cpu.hh"
 #include "cpu/base.hh"
 #include "cpu/cpuevent.hh"
-#include "cpu/thread_context.hh"
 #include "cpu/profile.hh"
+#include "cpu/thread_context.hh"
+#include "debug/Mwait.hh"
+#include "debug/SyscallVerbose.hh"
+#include "mem/page_table.hh"
 #include "params/BaseCPU.hh"
-#include "sim/sim_exit.hh"
+#include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/sim_events.hh"
+#include "sim/sim_exit.hh"
 #include "sim/system.hh"
 
 // Hack
@@ -61,29 +83,38 @@ vector<BaseCPU *> BaseCPU::cpuList;
 int maxThreadsPerCPU = 1;
 
 CPUProgressEvent::CPUProgressEvent(BaseCPU *_cpu, Tick ival)
-    : Event(Event::Progress_Event_Pri), interval(ival), lastNumInst(0),
-      cpu(_cpu)
+    : Event(Event::Progress_Event_Pri), _interval(ival), lastNumInst(0),
+      cpu(_cpu), _repeatEvent(true)
 {
-    if (interval)
-        cpu->schedule(this, curTick + interval);
+    if (_interval)
+        cpu->schedule(this, curTick() + _interval);
 }
 
 void
 CPUProgressEvent::process()
 {
-    Counter temp = cpu->totalInstructions();
+    Counter temp = cpu->totalOps();
+
+    if (_repeatEvent)
+      cpu->schedule(this, curTick() + _interval);
+
+    if(cpu->switchedOut()) {
+      return;
+    }
+
 #ifndef NDEBUG
-    double ipc = double(temp - lastNumInst) / (interval / cpu->ticks(1));
+    double ipc = double(temp - lastNumInst) / (_interval / cpu->clockPeriod());
 
-    DPRINTFN("%s progress event, instructions committed: %lli, IPC: %0.8d\n",
-             cpu->name(), temp - lastNumInst, ipc);
+    DPRINTFN("%s progress event, total committed:%i, progress insts committed: "
+             "%lli, IPC: %0.8d\n", cpu->name(), temp, temp - lastNumInst,
+             ipc);
     ipc = 0.0;
 #else
-    cprintf("%lli: %s progress event, instructions committed: %lli\n",
-            curTick, cpu->name(), temp - lastNumInst);
+    cprintf("%lli: %s progress event, total committed:%i, progress insts "
+            "committed: %lli\n", curTick(), cpu->name(), temp,
+            temp - lastNumInst);
 #endif
     lastNumInst = temp;
-    cpu->schedule(this, curTick + interval);
 }
 
 const char *
@@ -92,21 +123,18 @@ CPUProgressEvent::description() const
     return "CPU Progress";
 }
 
-#if FULL_SYSTEM
-BaseCPU::BaseCPU(Params *p)
-    : MemObject(p), clock(p->clock), instCnt(0), _cpuId(p->cpu_id),
-      interrupts(p->interrupts),
-      number_of_threads(p->numThreads), system(p->system),
-      phase(p->phase)
-#else
-BaseCPU::BaseCPU(Params *p)
-    : MemObject(p), clock(p->clock), _cpuId(p->cpu_id),
-      number_of_threads(p->numThreads), system(p->system),
-      phase(p->phase)
-#endif
+BaseCPU::BaseCPU(Params *p, bool is_checker)
+    : MemObject(p), instCnt(0), _cpuId(p->cpu_id), _socketId(p->socket_id),
+      _instMasterId(p->system->getMasterId(name() + ".inst")),
+      _dataMasterId(p->system->getMasterId(name() + ".data")),
+      _taskId(ContextSwitchTaskId::Unknown), _pid(Request::invldPid),
+      _switchedOut(p->switched_out), _cacheLineSize(p->system->cacheLineSize()),
+      interrupts(p->interrupts), profileEvent(NULL),
+      numThreads(p->numThreads), system(p->system),
+      functionTraceStream(nullptr), currentFunctionStart(0),
+      currentFunctionEnd(0), functionEntryTick(0),
+      addressMonitor()
 {
-//    currentTick = curTick;
-
     // if Python did not provide a valid ID, do it here
     if (_cpuId == -1 ) {
         _cpuId = cpuList.size();
@@ -115,25 +143,35 @@ BaseCPU::BaseCPU(Params *p)
     // add self to global list of CPUs
     cpuList.push_back(this);
 
-    DPRINTF(SyscallVerbose, "Constructing CPU with id %d\n", _cpuId);
+    DPRINTF(SyscallVerbose, "Constructing CPU with id %d, socket id %d\n",
+                _cpuId, _socketId);
 
-    if (number_of_threads > maxThreadsPerCPU)
-        maxThreadsPerCPU = number_of_threads;
+    if (numThreads > maxThreadsPerCPU)
+        maxThreadsPerCPU = numThreads;
 
     // allocate per-thread instruction-based event queues
-    comInstEventQueue = new EventQueue *[number_of_threads];
-    for (int i = 0; i < number_of_threads; ++i)
-        comInstEventQueue[i] = new EventQueue("instruction-based event queue");
+    comInstEventQueue = new EventQueue *[numThreads];
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        comInstEventQueue[tid] =
+            new EventQueue("instruction-based event queue");
 
     //
     // set up instruction-count-based termination events, if any
     //
     if (p->max_insts_any_thread != 0) {
         const char *cause = "a thread reached the max instruction count";
-        for (int i = 0; i < number_of_threads; ++i) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comInstEventQueue[i]->schedule(event, p->max_insts_any_thread);
-        }
+        for (ThreadID tid = 0; tid < numThreads; ++tid)
+            scheduleInstStop(tid, p->max_insts_any_thread, cause);
+    }
+
+    // Set up instruction-count-based termination events for SimPoints
+    // Typically, there are more than one action points.
+    // Simulation.py is responsible to take the necessary actions upon
+    // exitting the simulation loop.
+    if (!p->simpoint_start_insts.empty()) {
+        const char *cause = "simpoint starting point found";
+        for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i)
+            scheduleInstStop(0, p->simpoint_start_insts[i], cause);
     }
 
     if (p->max_insts_all_threads != 0) {
@@ -143,27 +181,25 @@ BaseCPU::BaseCPU(Params *p)
         // decrement this when triggered; simulation will terminate
         // when counter reaches 0
         int *counter = new int;
-        *counter = number_of_threads;
-        for (int i = 0; i < number_of_threads; ++i) {
+        *counter = numThreads;
+        for (ThreadID tid = 0; tid < numThreads; ++tid) {
             Event *event = new CountedExitEvent(cause, *counter);
-            comInstEventQueue[i]->schedule(event, p->max_insts_any_thread);
+            comInstEventQueue[tid]->schedule(event, p->max_insts_all_threads);
         }
     }
 
     // allocate per-thread load-based event queues
-    comLoadEventQueue = new EventQueue *[number_of_threads];
-    for (int i = 0; i < number_of_threads; ++i)
-        comLoadEventQueue[i] = new EventQueue("load-based event queue");
+    comLoadEventQueue = new EventQueue *[numThreads];
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        comLoadEventQueue[tid] = new EventQueue("load-based event queue");
 
     //
     // set up instruction-count-based termination events, if any
     //
     if (p->max_loads_any_thread != 0) {
         const char *cause = "a thread reached the max load count";
-        for (int i = 0; i < number_of_threads; ++i) {
-            Event *event = new SimLoopExitEvent(cause, 0);
-            comLoadEventQueue[i]->schedule(event, p->max_loads_any_thread);
-        }
+        for (ThreadID tid = 0; tid < numThreads; ++tid)
+            scheduleLoadStop(tid, p->max_loads_any_thread, cause);
     }
 
     if (p->max_loads_all_threads != 0) {
@@ -172,16 +208,20 @@ BaseCPU::BaseCPU(Params *p)
         // decrement this when triggered; simulation will terminate
         // when counter reaches 0
         int *counter = new int;
-        *counter = number_of_threads;
-        for (int i = 0; i < number_of_threads; ++i) {
+        *counter = numThreads;
+        for (ThreadID tid = 0; tid < numThreads; ++tid) {
             Event *event = new CountedExitEvent(cause, *counter);
-            comLoadEventQueue[i]->schedule(event, p->max_loads_all_threads);
+            comLoadEventQueue[tid]->schedule(event, p->max_loads_all_threads);
         }
     }
 
     functionTracingEnabled = false;
     if (p->function_trace) {
-        functionTraceStream = simout.find(csprintf("ftrace.%s", name()));
+        const string fname = csprintf("ftrace.%s", name());
+        functionTraceStream = simout.find(fname);
+        if (!functionTraceStream)
+            functionTraceStream = simout.create(fname);
+
         currentFunctionStart = currentFunctionEnd = 0;
         functionEntryTick = p->function_trace_start;
 
@@ -193,12 +233,28 @@ BaseCPU::BaseCPU(Params *p)
             schedule(event, p->function_trace_start);
         }
     }
-#if FULL_SYSTEM
-    profileEvent = NULL;
-    if (params()->profile)
-        profileEvent = new ProfileEvent(this, params()->profile);
-#endif
+
+    // The interrupts should always be present unless this CPU is
+    // switched in later or in case it is a checker CPU
+    if (!params()->switched_out && !is_checker) {
+        if (interrupts) {
+            interrupts->setCPU(this);
+        } else {
+            fatal("CPU %s has no interrupt controller.\n"
+                  "Ensure createInterruptController() is called.\n", name());
+        }
+    }
+
+    if (FullSystem) {
+        if (params()->profile)
+            profileEvent = new ProfileEvent(this, params()->profile);
+    }
     tracer = params()->tracer;
+
+    if (params()->isa.size() != numThreads) {
+        fatal("Number of ISAs (%i) assigned to the CPU does not equal number "
+              "of threads (%i).\n", params()->isa.size(), numThreads);
+    }
 }
 
 void
@@ -209,30 +265,127 @@ BaseCPU::enableFunctionTrace()
 
 BaseCPU::~BaseCPU()
 {
+    delete profileEvent;
+    delete[] comLoadEventQueue;
+    delete[] comInstEventQueue;
+}
+
+void
+BaseCPU::armMonitor(Addr address)
+{
+    addressMonitor.armed = true;
+    addressMonitor.vAddr = address;
+    addressMonitor.pAddr = 0x0;
+    DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address);
+}
+
+bool
+BaseCPU::mwait(PacketPtr pkt)
+{
+    if(addressMonitor.gotWakeup == false) {
+        int block_size = cacheLineSize();
+        uint64_t mask = ~((uint64_t)(block_size - 1));
+
+        assert(pkt->req->hasPaddr());
+        addressMonitor.pAddr = pkt->getAddr() & mask;
+        addressMonitor.waiting = true;
+
+        DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+                addressMonitor.vAddr, addressMonitor.pAddr);
+        return true;
+    } else {
+        addressMonitor.gotWakeup = false;
+        return false;
+    }
+}
+
+void
+BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb)
+{
+    Request req;
+    Addr addr = addressMonitor.vAddr;
+    int block_size = cacheLineSize();
+    uint64_t mask = ~((uint64_t)(block_size - 1));
+    int size = block_size;
+
+    //The address of the next line if it crosses a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, block_size);
+
+    if (secondAddr > addr)
+        size = secondAddr - addr;
+
+    req.setVirt(0, addr, size, 0x0, dataMasterId(), tc->instAddr());
+
+    // translate to physical address
+    Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read);
+    assert(fault == NoFault);
+
+    addressMonitor.pAddr = req.getPaddr() & mask;
+    addressMonitor.waiting = true;
+
+    DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n",
+            addressMonitor.vAddr, addressMonitor.pAddr);
 }
 
 void
 BaseCPU::init()
 {
-    if (!params()->defer_registration)
+    if (!params()->switched_out) {
         registerThreadContexts();
+
+        verifyMemoryMode();
+    }
 }
 
 void
 BaseCPU::startup()
 {
-#if FULL_SYSTEM
-    if (!params()->defer_registration && profileEvent)
-        schedule(profileEvent, curTick);
-#endif
+    if (FullSystem) {
+        if (!params()->switched_out && profileEvent)
+            schedule(profileEvent, curTick());
+    }
 
     if (params()->progress_interval) {
-        Tick num_ticks = ticks(params()->progress_interval);
-        Event *event = new CPUProgressEvent(this, num_ticks);
-        schedule(event, curTick + num_ticks);
+        new CPUProgressEvent(this, params()->progress_interval);
     }
 }
 
+ProbePoints::PMUUPtr
+BaseCPU::pmuProbePoint(const char *name)
+{
+    ProbePoints::PMUUPtr ptr;
+    ptr.reset(new ProbePoints::PMU(getProbeManager(), name));
+
+    return ptr;
+}
+
+void
+BaseCPU::regProbePoints()
+{
+    ppCycles = pmuProbePoint("Cycles");
+
+    ppRetiredInsts = pmuProbePoint("RetiredInsts");
+    ppRetiredLoads = pmuProbePoint("RetiredLoads");
+    ppRetiredStores = pmuProbePoint("RetiredStores");
+    ppRetiredBranches = pmuProbePoint("RetiredBranches");
+}
+
+void
+BaseCPU::probeInstCommit(const StaticInstPtr &inst)
+{
+    if (!inst->isMicroop() || inst->isLastMicroop())
+        ppRetiredInsts->notify(1);
+
+
+    if (inst->isLoad())
+        ppRetiredLoads->notify(1);
+
+    if (inst->isStore())
+        ppRetiredStores->notify(1);
+
+    if (inst->isControl())
+        ppRetiredBranches->notify(1);
+}
 
 void
 BaseCPU::regStats()
@@ -244,6 +397,16 @@ BaseCPU::regStats()
         .desc("number of cpu cycles simulated")
         ;
 
+    numWorkItemsStarted
+        .name(name() + ".numWorkItemsStarted")
+        .desc("number of work items this cpu started")
+        ;
+
+    numWorkItemsCompleted
+        .name(name() + ".numWorkItemsCompleted")
+        .desc("number of work items this cpu completed")
+        ;
+
     int size = threadContexts.size();
     if (size > 1) {
         for (int i = 0; i < size; ++i) {
@@ -253,37 +416,29 @@ BaseCPU::regStats()
         }
     } else if (size == 1)
         threadContexts[0]->regStats(name());
-
-#if FULL_SYSTEM
-#endif
 }
 
-Tick
-BaseCPU::nextCycle()
+BaseMasterPort &
+BaseCPU::getMasterPort(const string &if_name, PortID idx)
 {
-    Tick next_tick = curTick - phase + clock - 1;
-    next_tick -= (next_tick % clock);
-    next_tick += phase;
-    return next_tick;
-}
-
-Tick
-BaseCPU::nextCycle(Tick begin_tick)
-{
-    Tick next_tick = begin_tick;
-    if (next_tick % clock != 0)
-        next_tick = next_tick - (next_tick % clock) + clock;
-    next_tick += phase;
-
-    assert(next_tick >= curTick);
-    return next_tick;
+    // Get the right port based on name. This applies to all the
+    // subclasses of the base CPU and relies on their implementation
+    // of getDataPort and getInstPort. In all cases there methods
+    // return a MasterPort pointer.
+    if (if_name == "dcache_port")
+        return getDataPort();
+    else if (if_name == "icache_port")
+        return getInstPort();
+    else
+        return MemObject::getMasterPort(if_name, idx);
 }
 
 void
 BaseCPU::registerThreadContexts()
 {
-    for (int i = 0; i < threadContexts.size(); ++i) {
-        ThreadContext *tc = threadContexts[i];
+    ThreadID size = threadContexts.size();
+    for (ThreadID tid = 0; tid < size; ++tid) {
+        ThreadContext *tc = threadContexts[tid];
 
         /** This is so that contextId and cpuId match where there is a
          * 1cpu:1context relationship.  Otherwise, the order of registration
@@ -292,13 +447,13 @@ BaseCPU::registerThreadContexts()
          * cpu 0 has the lowest thread contexts and cpu N has the highest, but
          * I'll just do this for now
          */
-        if (number_of_threads == 1)
+        if (numThreads == 1)
             tc->setContextId(system->registerThreadContext(tc, _cpuId));
         else
             tc->setContextId(system->registerThreadContext(tc));
-#if !FULL_SYSTEM
-        tc->getProcessPtr()->assignThreadContext(tc->contextId());
-#endif
+
+        if (!FullSystem)
+            tc->getProcessPtr()->assignThreadContext(tc->contextId());
     }
 }
 
@@ -306,9 +461,10 @@ BaseCPU::registerThreadContexts()
 int
 BaseCPU::findContext(ThreadContext *tc)
 {
-    for (int i = 0; i < threadContexts.size(); ++i) {
-        if (tc == threadContexts[i])
-            return i;
+    ThreadID size = threadContexts.size();
+    for (ThreadID tid = 0; tid < size; ++tid) {
+        if (tc == threadContexts[tid])
+            return tid;
     }
     return 0;
 }
@@ -316,21 +472,29 @@ BaseCPU::findContext(ThreadContext *tc)
 void
 BaseCPU::switchOut()
 {
-//    panic("This CPU doesn't support sampling!");
-#if FULL_SYSTEM
+    assert(!_switchedOut);
+    _switchedOut = true;
     if (profileEvent && profileEvent->scheduled())
         deschedule(profileEvent);
-#endif
+
+    // Flush all TLBs in the CPU to avoid having stale translations if
+    // it gets switched in later.
+    flushTLBs();
 }
 
 void
-BaseCPU::takeOverFrom(BaseCPU *oldCPU, Port *ic, Port *dc)
+BaseCPU::takeOverFrom(BaseCPU *oldCPU)
 {
     assert(threadContexts.size() == oldCPU->threadContexts.size());
-
-    _cpuId = oldCPU->cpuId();
-
-    for (int i = 0; i < threadContexts.size(); ++i) {
+    assert(_cpuId == oldCPU->cpuId());
+    assert(_switchedOut);
+    assert(oldCPU != this);
+    _pid = oldCPU->getPid();
+    _taskId = oldCPU->taskId();
+    _switchedOut = false;
+
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
         ThreadContext *newTC = threadContexts[i];
         ThreadContext *oldTC = oldCPU->threadContexts[i];
 
@@ -342,38 +506,123 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU, Port *ic, Port *dc)
         assert(newTC->threadId() == oldTC->threadId());
         system->replaceThreadContext(newTC, newTC->contextId());
 
-        if (DTRACE(Context))
+        /* This code no longer works since the zero register (e.g.,
+         * r31 on Alpha) doesn't necessarily contain zero at this
+         * point.
+           if (DTRACE(Context))
             ThreadContext::compare(oldTC, newTC);
+        */
+
+        BaseMasterPort *old_itb_port = oldTC->getITBPtr()->getMasterPort();
+        BaseMasterPort *old_dtb_port = oldTC->getDTBPtr()->getMasterPort();
+        BaseMasterPort *new_itb_port = newTC->getITBPtr()->getMasterPort();
+        BaseMasterPort *new_dtb_port = newTC->getDTBPtr()->getMasterPort();
+
+        // Move over any table walker ports if they exist
+        if (new_itb_port) {
+            assert(!new_itb_port->isConnected());
+            assert(old_itb_port);
+            assert(old_itb_port->isConnected());
+            BaseSlavePort &slavePort = old_itb_port->getSlavePort();
+            old_itb_port->unbind();
+            new_itb_port->bind(slavePort);
+        }
+        if (new_dtb_port) {
+            assert(!new_dtb_port->isConnected());
+            assert(old_dtb_port);
+            assert(old_dtb_port->isConnected());
+            BaseSlavePort &slavePort = old_dtb_port->getSlavePort();
+            old_dtb_port->unbind();
+            new_dtb_port->bind(slavePort);
+        }
+        newTC->getITBPtr()->takeOverFrom(oldTC->getITBPtr());
+        newTC->getDTBPtr()->takeOverFrom(oldTC->getDTBPtr());
+
+        // Checker whether or not we have to transfer CheckerCPU
+        // objects over in the switch
+        CheckerCPU *oldChecker = oldTC->getCheckerCpuPtr();
+        CheckerCPU *newChecker = newTC->getCheckerCpuPtr();
+        if (oldChecker && newChecker) {
+            BaseMasterPort *old_checker_itb_port =
+                oldChecker->getITBPtr()->getMasterPort();
+            BaseMasterPort *old_checker_dtb_port =
+                oldChecker->getDTBPtr()->getMasterPort();
+            BaseMasterPort *new_checker_itb_port =
+                newChecker->getITBPtr()->getMasterPort();
+            BaseMasterPort *new_checker_dtb_port =
+                newChecker->getDTBPtr()->getMasterPort();
+
+            newChecker->getITBPtr()->takeOverFrom(oldChecker->getITBPtr());
+            newChecker->getDTBPtr()->takeOverFrom(oldChecker->getDTBPtr());
+
+            // Move over any table walker ports if they exist for checker
+            if (new_checker_itb_port) {
+                assert(!new_checker_itb_port->isConnected());
+                assert(old_checker_itb_port);
+                assert(old_checker_itb_port->isConnected());
+                BaseSlavePort &slavePort =
+                    old_checker_itb_port->getSlavePort();
+                old_checker_itb_port->unbind();
+                new_checker_itb_port->bind(slavePort);
+            }
+            if (new_checker_dtb_port) {
+                assert(!new_checker_dtb_port->isConnected());
+                assert(old_checker_dtb_port);
+                assert(old_checker_dtb_port->isConnected());
+                BaseSlavePort &slavePort =
+                    old_checker_dtb_port->getSlavePort();
+                old_checker_dtb_port->unbind();
+                new_checker_dtb_port->bind(slavePort);
+            }
+        }
     }
 
-#if FULL_SYSTEM
     interrupts = oldCPU->interrupts;
+    interrupts->setCPU(this);
+    oldCPU->interrupts = NULL;
 
-    for (int i = 0; i < threadContexts.size(); ++i)
-        threadContexts[i]->profileClear();
-
-    if (profileEvent)
-        schedule(profileEvent, curTick);
-#endif
+    if (FullSystem) {
+        for (ThreadID i = 0; i < size; ++i)
+            threadContexts[i]->profileClear();
 
-    // Connect new CPU to old CPU's memory only if new CPU isn't
-    // connected to anything.  Also connect old CPU's memory to new
-    // CPU.
-    if (!ic->isConnected()) {
-        Port *peer = oldCPU->getPort("icache_port")->getPeer();
-        ic->setPeer(peer);
-        peer->setPeer(ic);
+        if (profileEvent)
+            schedule(profileEvent, curTick());
     }
 
-    if (!dc->isConnected()) {
-        Port *peer = oldCPU->getPort("dcache_port")->getPeer();
-        dc->setPeer(peer);
-        peer->setPeer(dc);
+    // All CPUs have an instruction and a data port, and the new CPU's
+    // ports are dangling while the old CPU has its ports connected
+    // already. Unbind the old CPU and then bind the ports of the one
+    // we are switching to.
+    assert(!getInstPort().isConnected());
+    assert(oldCPU->getInstPort().isConnected());
+    BaseSlavePort &inst_peer_port = oldCPU->getInstPort().getSlavePort();
+    oldCPU->getInstPort().unbind();
+    getInstPort().bind(inst_peer_port);
+
+    assert(!getDataPort().isConnected());
+    assert(oldCPU->getDataPort().isConnected());
+    BaseSlavePort &data_peer_port = oldCPU->getDataPort().getSlavePort();
+    oldCPU->getDataPort().unbind();
+    getDataPort().bind(data_peer_port);
+}
+
+void
+BaseCPU::flushTLBs()
+{
+    for (ThreadID i = 0; i < threadContexts.size(); ++i) {
+        ThreadContext &tc(*threadContexts[i]);
+        CheckerCPU *checker(tc.getCheckerCpuPtr());
+
+        tc.getITBPtr()->flushAll();
+        tc.getDTBPtr()->flushAll();
+        if (checker) {
+            checker->getITBPtr()->flushAll();
+            checker->getDTBPtr()->flushAll();
+        }
     }
 }
 
 
-#if FULL_SYSTEM
 BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, Tick _interval)
     : cpu(_cpu), interval(_interval)
 { }
@@ -381,47 +630,89 @@ BaseCPU::ProfileEvent::ProfileEvent(BaseCPU *_cpu, Tick _interval)
 void
 BaseCPU::ProfileEvent::process()
 {
-    for (int i = 0, size = cpu->threadContexts.size(); i < size; ++i) {
+    ThreadID size = cpu->threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
         ThreadContext *tc = cpu->threadContexts[i];
         tc->profileSample();
     }
 
-    cpu->schedule(this, curTick + interval);
+    cpu->schedule(this, curTick() + interval);
 }
 
 void
-BaseCPU::postInterrupt(int int_num, int index)
+BaseCPU::serialize(std::ostream &os)
 {
-    interrupts->post(int_num, index);
+    SERIALIZE_SCALAR(instCnt);
+
+    if (!_switchedOut) {
+        /* Unlike _pid, _taskId is not serialized, as they are dynamically
+         * assigned unique ids that are only meaningful for the duration of
+         * a specific run. We will need to serialize the entire taskMap in
+         * system. */
+        SERIALIZE_SCALAR(_pid);
+
+        interrupts->serialize(os);
+
+        // Serialize the threads, this is done by the CPU implementation.
+        for (ThreadID i = 0; i < numThreads; ++i) {
+            nameOut(os, csprintf("%s.xc.%i", name(), i));
+            serializeThread(os, i);
+        }
+    }
 }
 
 void
-BaseCPU::clearInterrupt(int int_num, int index)
+BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
 {
-    interrupts->clear(int_num, index);
+    UNSERIALIZE_SCALAR(instCnt);
+
+    if (!_switchedOut) {
+        UNSERIALIZE_SCALAR(_pid);
+        interrupts->unserialize(cp, section);
+
+        // Unserialize the threads, this is done by the CPU implementation.
+        for (ThreadID i = 0; i < numThreads; ++i)
+            unserializeThread(cp, csprintf("%s.xc.%i", section, i), i);
+    }
 }
 
 void
-BaseCPU::clearInterrupts()
+BaseCPU::scheduleInstStop(ThreadID tid, Counter insts, const char *cause)
 {
-    interrupts->clearAll();
+    const Tick now(comInstEventQueue[tid]->getCurTick());
+    Event *event(new LocalSimLoopExitEvent(cause, 0));
+
+    comInstEventQueue[tid]->schedule(event, now + insts);
 }
 
-void
-BaseCPU::serialize(std::ostream &os)
-{
-    SERIALIZE_SCALAR(instCnt);
-    interrupts->serialize(os);
+AddressMonitor::AddressMonitor() {
+    armed = false;
+    waiting = false;
+    gotWakeup = false;
+}
+
+bool AddressMonitor::doMonitor(PacketPtr pkt) {
+    assert(pkt->req->hasPaddr());
+    if(armed && waiting) {
+        if(pAddr == pkt->getAddr()) {
+            DPRINTF(Mwait,"pAddr=0x%lx invalidated: waking up core\n",
+                    pkt->getAddr());
+            waiting = false;
+            return true;
+        }
+    }
+    return false;
 }
 
 void
-BaseCPU::unserialize(Checkpoint *cp, const std::string &section)
+BaseCPU::scheduleLoadStop(ThreadID tid, Counter loads, const char *cause)
 {
-    UNSERIALIZE_SCALAR(instCnt);
-    interrupts->unserialize(cp, section);
+    const Tick now(comLoadEventQueue[tid]->getCurTick());
+    Event *event(new LocalSimLoopExitEvent(cause, 0));
+
+    comLoadEventQueue[tid]->schedule(event, now + loads);
 }
 
-#endif // FULL_SYSTEM
 
 void
 BaseCPU::traceFunctionsInternal(Addr pc)
@@ -445,7 +736,7 @@ BaseCPU::traceFunctionsInternal(Addr pc)
         }
 
         ccprintf(*functionTraceStream, " (%d)\n%d: %s",
-                 curTick - functionEntryTick, curTick, sym_str);
-        functionEntryTick = curTick;
+                 curTick() - functionEntryTick, curTick(), sym_str);
+        functionEntryTick = curTick();
     }
 }