Merge ktlim@zizzer:/bk/m5
[gem5.git] / src / cpu / o3 / cpu.cc
index a268dbc23417be9c53bcfb2943ba6bd12ef95522..ed02a845b44925fe23d7d9d84446b80e6fa06f30 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #else
 #include "sim/process.hh"
 #endif
-#include "sim/root.hh"
 
+#include "cpu/activity.hh"
+#include "cpu/checker/cpu.hh"
 #include "cpu/cpu_exec_context.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/o3/alpha_dyn_inst.hh"
 #include "cpu/o3/alpha_impl.hh"
 #include "cpu/o3/cpu.hh"
 
+#include "sim/root.hh"
+#include "sim/stat_control.hh"
+
 using namespace std;
 
-BaseFullCPU::BaseFullCPU(Params &params)
-    : BaseCPU(&params), cpu_id(0)
+BaseFullCPU::BaseFullCPU(Params *params)
+    : BaseCPU(params), cpu_id(0)
+{
+}
+
+void
+BaseFullCPU::regStats()
 {
+    BaseCPU::regStats();
 }
 
 template <class Impl>
@@ -68,97 +78,88 @@ FullO3CPU<Impl>::TickEvent::description()
     return "FullO3CPU tick event";
 }
 
-//Call constructor to all the pipeline stages here
 template <class Impl>
-FullO3CPU<Impl>::FullO3CPU(Params &params)
-#if FULL_SYSTEM
-    : BaseFullCPU(params),
-#else
+FullO3CPU<Impl>::FullO3CPU(Params *params)
     : BaseFullCPU(params),
-#endif // FULL_SYSTEM
       tickEvent(this),
+      removeInstsThisCycle(false),
       fetch(params),
       decode(params),
       rename(params),
       iew(params),
       commit(params),
 
-      regFile(params.numPhysIntRegs, params.numPhysFloatRegs),
+      regFile(params->numPhysIntRegs, params->numPhysFloatRegs),
 
-      freeList(TheISA::NumIntRegs, params.numPhysIntRegs,
-               TheISA::NumFloatRegs, params.numPhysFloatRegs),
+      freeList(params->numberOfThreads,//number of activeThreads
+               TheISA::NumIntRegs, params->numPhysIntRegs,
+               TheISA::NumFloatRegs, params->numPhysFloatRegs),
 
-      renameMap(TheISA::NumIntRegs, params.numPhysIntRegs,
-                TheISA::NumFloatRegs, params.numPhysFloatRegs,
-                TheISA::NumMiscRegs,
-                TheISA::ZeroReg,
-                TheISA::ZeroReg + TheISA::NumIntRegs),
+      rob(params->numROBEntries, params->squashWidth,
+          params->smtROBPolicy, params->smtROBThreshold,
+          params->numberOfThreads),
 
-      rob(params.numROBEntries, params.squashWidth),
+      scoreboard(params->numberOfThreads,//number of activeThreads
+                 TheISA::NumIntRegs, params->numPhysIntRegs,
+                 TheISA::NumFloatRegs, params->numPhysFloatRegs,
+                 TheISA::NumMiscRegs * number_of_threads,
+                 TheISA::ZeroReg),
 
-      // What to pass to these time buffers?
       // For now just have these time buffers be pretty big.
+      // @todo: Make these time buffer sizes parameters or derived
+      // from latencies
       timeBuffer(5, 5),
       fetchQueue(5, 5),
       decodeQueue(5, 5),
       renameQueue(5, 5),
       iewQueue(5, 5),
-
-      cpuXC(NULL),
+      activityRec(NumStages, 10, params->activity),
 
       globalSeqNum(1),
 
 #if FULL_SYSTEM
-      system(params.system),
+      system(params->system),
       memCtrl(system->memctrl),
       physmem(system->physmem),
-      itb(params.itb),
-      dtb(params.dtb),
-      mem(params.mem),
+      mem(params->mem),
 #else
-      // Hardcoded for a single thread!!
-      mem(params.workload[0]->getMemory()),
+//      pTable(params->pTable),
+      mem(params->workload[0]->getMemory()),
 #endif // FULL_SYSTEM
-
-      icacheInterface(params.icacheInterface),
-      dcacheInterface(params.dcacheInterface),
-      deferRegistration(params.defReg),
-      numInsts(0),
-      funcExeInst(0)
+      switchCount(0),
+      icacheInterface(params->icacheInterface),
+      dcacheInterface(params->dcacheInterface),
+      deferRegistration(params->deferRegistration),
+      numThreads(number_of_threads)
 {
     _status = Idle;
 
-#if !FULL_SYSTEM
-    thread.resize(this->number_of_threads);
-#endif
-
-    for (int i = 0; i < this->number_of_threads; ++i) {
+    if (params->checker) {
+        BaseCPU *temp_checker = params->checker;
+        checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
+        checker->setMemory(mem);
 #if FULL_SYSTEM
-        assert(i == 0);
-        thread[i] = new CPUExecContext(this, 0, system, itb, dtb, mem);
-        system->execContexts[i] = thread[i]->getProxy();
-
-        execContexts.push_back(system->execContexts[i]);
-#else
-        if (i < params.workload.size()) {
-            DPRINTF(FullCPU, "FullCPU: Workload[%i]'s starting PC is %#x, "
-                    "process is %#x",
-                    i, params.workload[i]->prog_entry, thread[i]);
-            thread[i] = new CPUExecContext(this, i, params.workload[i], i);
-        }
-        assert(params.workload[i]->getMemory() != NULL);
-        assert(mem != NULL);
-        execContexts.push_back(thread[i]->getProxy());
-#endif // !FULL_SYSTEM
+        checker->setSystem(params->system);
+#endif
+    } else {
+        checker = NULL;
     }
 
-    // Note that this is a hack so that my code which still uses xc-> will
-    // still work.  I should remove this eventually
-    cpuXC = thread[0];
+#if !FULL_SYSTEM
+    thread.resize(number_of_threads);
+    tids.resize(number_of_threads);
+#endif
 
-    // The stages also need their CPU pointer setup.  However this must be
-    // done at the upper level CPU because they have pointers to the upper
-    // level CPU, and not this FullO3CPU.
+    // The stages also need their CPU pointer setup.  However this
+    // must be done at the upper level CPU because they have pointers
+    // to the upper level CPU, and not this FullO3CPU.
+
+    // Set up Pointers to the activeThreads list for each stage
+    fetch.setActiveThreads(&activeThreads);
+    decode.setActiveThreads(&activeThreads);
+    rename.setActiveThreads(&activeThreads);
+    iew.setActiveThreads(&activeThreads);
+    commit.setActiveThreads(&activeThreads);
 
     // Give each of the stages the time buffer they will use.
     fetch.setTimeBuffer(&timeBuffer);
@@ -170,6 +171,7 @@ FullO3CPU<Impl>::FullO3CPU(Params &params)
     // Also setup each of the stages' queues.
     fetch.setFetchQueue(&fetchQueue);
     decode.setFetchQueue(&fetchQueue);
+    commit.setFetchQueue(&fetchQueue);
     decode.setDecodeQueue(&decodeQueue);
     rename.setDecodeQueue(&decodeQueue);
     rename.setRenameQueue(&renameQueue);
@@ -178,16 +180,85 @@ FullO3CPU<Impl>::FullO3CPU(Params &params)
     commit.setIEWQueue(&iewQueue);
     commit.setRenameQueue(&renameQueue);
 
+    commit.setFetchStage(&fetch);
+    commit.setIEWStage(&iew);
+    rename.setIEWStage(&iew);
+    rename.setCommitStage(&commit);
+
+#if !FULL_SYSTEM
+    int active_threads = params->workload.size();
+#else
+    int active_threads = 1;
+#endif
+
+    //Make Sure That this a Valid Architeture
+    assert(params->numPhysIntRegs   >= numThreads * TheISA::NumIntRegs);
+    assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
+
+    rename.setScoreboard(&scoreboard);
+    iew.setScoreboard(&scoreboard);
+
     // Setup the rename map for whichever stages need it.
-    rename.setRenameMap(&renameMap);
-    iew.setRenameMap(&renameMap);
+    PhysRegIndex lreg_idx = 0;
+    PhysRegIndex freg_idx = params->numPhysIntRegs; //Index to 1 after int regs
+
+    for (int tid=0; tid < numThreads; tid++) {
+        bool bindRegs = (tid <= active_threads - 1);
+
+        commitRenameMap[tid].init(TheISA::NumIntRegs,
+                                  params->numPhysIntRegs,
+                                  lreg_idx,            //Index for Logical. Regs
+
+                                  TheISA::NumFloatRegs,
+                                  params->numPhysFloatRegs,
+                                  freg_idx,            //Index for Float Regs
+
+                                  TheISA::NumMiscRegs,
+
+                                  TheISA::ZeroReg,
+                                  TheISA::ZeroReg,
+
+                                  tid,
+                                  false);
+
+        renameMap[tid].init(TheISA::NumIntRegs,
+                            params->numPhysIntRegs,
+                            lreg_idx,                  //Index for Logical. Regs
+
+                            TheISA::NumFloatRegs,
+                            params->numPhysFloatRegs,
+                            freg_idx,                  //Index for Float Regs
 
-    // Setup the free list for whichever stages need it.
+                            TheISA::NumMiscRegs,
+
+                            TheISA::ZeroReg,
+                            TheISA::ZeroReg,
+
+                            tid,
+                            bindRegs);
+    }
+
+    rename.setRenameMap(renameMap);
+    commit.setRenameMap(commitRenameMap);
+
+    // Give renameMap & rename stage access to the freeList;
+    for (int i=0; i < numThreads; i++) {
+        renameMap[i].setFreeList(&freeList);
+    }
     rename.setFreeList(&freeList);
-    renameMap.setFreeList(&freeList);
+
+    // Setup the page table for whichever stages need it.
+#if !FULL_SYSTEM
+//    fetch.setPageTable(pTable);
+//    iew.setPageTable(pTable);
+#endif
 
     // Setup the ROB for whichever stages need it.
     commit.setROB(&rob);
+
+    lastRunningCycle = curTick;
+
+    contextSwitch = false;
 }
 
 template <class Impl>
@@ -199,7 +270,58 @@ template <class Impl>
 void
 FullO3CPU<Impl>::fullCPURegStats()
 {
+    BaseFullCPU::regStats();
+
     // Register any of the FullCPU's stats here.
+    timesIdled
+        .name(name() + ".timesIdled")
+        .desc("Number of times that the entire CPU went into an idle state and"
+              " unscheduled itself")
+        .prereq(timesIdled);
+
+    idleCycles
+        .name(name() + ".idleCycles")
+        .desc("Total number of cycles that the CPU has spent unscheduled due "
+              "to idling")
+        .prereq(idleCycles);
+
+    // Number of Instructions simulated
+    // --------------------------------
+    // Should probably be in Base CPU but need templated
+    // MaxThreads so put in here instead
+    committedInsts
+        .init(numThreads)
+        .name(name() + ".committedInsts")
+        .desc("Number of Instructions Simulated");
+
+    totalCommittedInsts
+        .name(name() + ".committedInsts_total")
+        .desc("Number of Instructions Simulated");
+
+    cpi
+        .name(name() + ".cpi")
+        .desc("CPI: Cycles Per Instruction")
+        .precision(6);
+    cpi = simTicks / committedInsts;
+
+    totalCpi
+        .name(name() + ".cpi_total")
+        .desc("CPI: Total CPI of All Threads")
+        .precision(6);
+    totalCpi = simTicks / totalCommittedInsts;
+
+    ipc
+        .name(name() + ".ipc")
+        .desc("IPC: Instructions Per Cycle")
+        .precision(6);
+    ipc =  committedInsts / simTicks;
+
+    totalIpc
+        .name(name() + ".ipc_total")
+        .desc("IPC: Total IPC of All Threads")
+        .precision(6);
+    totalIpc =  totalCommittedInsts / simTicks;
+
 }
 
 template <class Impl>
@@ -208,9 +330,11 @@ FullO3CPU<Impl>::tick()
 {
     DPRINTF(FullCPU, "\n\nFullCPU: Ticking main, FullO3CPU.\n");
 
-    //Tick each of the stages if they're actually running.
-    //Will want to figure out a way to unschedule itself if they're all
-    //going to be idle for a long time.
+    ++numCycles;
+
+//    activity = false;
+
+    //Tick each of the stages
     fetch.tick();
 
     decode.tick();
@@ -221,7 +345,11 @@ FullO3CPU<Impl>::tick()
 
     commit.tick();
 
-    // Now advance the time buffers, unless the stage is stalled.
+#if !FULL_SYSTEM
+    doContextSwitch();
+#endif
+
+    // Now advance the time buffers
     timeBuffer.advance();
 
     fetchQueue.advance();
@@ -229,99 +357,396 @@ FullO3CPU<Impl>::tick()
     renameQueue.advance();
     iewQueue.advance();
 
-    if (_status == Running && !tickEvent.scheduled())
-        tickEvent.schedule(curTick + 1);
+    activityRec.advance();
+
+    if (removeInstsThisCycle) {
+        cleanUpRemovedInsts();
+    }
+
+    if (!tickEvent.scheduled()) {
+        if (_status == SwitchedOut) {
+            // increment stat
+            lastRunningCycle = curTick;
+        } else if (!activityRec.active()) {
+            lastRunningCycle = curTick;
+            timesIdled++;
+        } else {
+            tickEvent.schedule(curTick + cycles(1));
+        }
+    }
+
+#if !FULL_SYSTEM
+    updateThreadPriority();
+#endif
+
 }
 
 template <class Impl>
 void
 FullO3CPU<Impl>::init()
 {
-    if(!deferRegistration)
-    {
-        this->registerExecContexts();
+    if (!deferRegistration) {
+        registerExecContexts();
+    }
 
-        // Need to do a copy of the xc->regs into the CPU's regfile so
-        // that it can start properly.
+    // Set inSyscall so that the CPU doesn't squash when initially
+    // setting up registers.
+    for (int i = 0; i < number_of_threads; ++i)
+        thread[i]->inSyscall = true;
+
+    for (int tid=0; tid < number_of_threads; tid++) {
 #if FULL_SYSTEM
-        ExecContext *src_xc = system->execContexts[0];
-        TheISA::initCPU(src_xc, src_xc->readCpuId());
+        ExecContext *src_xc = execContexts[tid];
 #else
-        ExecContext *src_xc = thread[0]->getProxy();
+        ExecContext *src_xc = thread[tid]->getXCProxy();
 #endif
-        // First loop through the integer registers.
-        for (int i = 0; i < TheISA::NumIntRegs; ++i)
-        {
-            regFile.intRegFile[i] = src_xc->readIntReg(i);
+        // Threads start in the Suspended State
+        if (src_xc->status() != ExecContext::Suspended) {
+            continue;
         }
 
-        // Then loop through the floating point registers.
-        for (int i = 0; i < TheISA::NumFloatRegs; ++i)
-        {
-            regFile.floatRegFile.setRegBits(i, src_xc->readRegBits(i))
-        }
-/*
-        // Then loop through the misc registers.
-        regFile.miscRegs.fpcr = src_xc->regs.miscRegs.fpcr;
-        regFile.miscRegs.uniq = src_xc->regs.miscRegs.uniq;
-        regFile.miscRegs.lock_flag = src_xc->regs.miscRegs.lock_flag;
-        regFile.miscRegs.lock_addr = src_xc->regs.miscRegs.lock_addr;
-*/
-        // Then finally set the PC and the next PC.
-        regFile.pc = src_xc->readPC();
-        regFile.npc = src_xc->readNextPC();
+#if FULL_SYSTEM
+        TheISA::initCPU(src_xc, src_xc->readCpuId());
+#endif
+    }
+
+    // Clear inSyscall.
+    for (int i = 0; i < number_of_threads; ++i)
+        thread[i]->inSyscall = false;
+
+    // Initialize stages.
+    fetch.initStage();
+    iew.initStage();
+    rename.initStage();
+    commit.initStage();
+
+    commit.setThreads(thread);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::insertThread(unsigned tid)
+{
+    DPRINTF(FullCPU,"[tid:%i] Initializing thread data");
+    // Will change now that the PC and thread state is internal to the CPU
+    // and not in the CPUExecContext.
+#if 0
+#if FULL_SYSTEM
+    ExecContext *src_xc = system->execContexts[tid];
+#else
+    CPUExecContext *src_xc = thread[tid];
+#endif
+
+    //Bind Int Regs to Rename Map
+    for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
+        PhysRegIndex phys_reg = freeList.getIntReg();
+
+        renameMap[tid].setEntry(ireg,phys_reg);
+        scoreboard.setReg(phys_reg);
+    }
+
+    //Bind Float Regs to Rename Map
+    for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
+        PhysRegIndex phys_reg = freeList.getFloatReg();
+
+        renameMap[tid].setEntry(freg,phys_reg);
+        scoreboard.setReg(phys_reg);
+    }
+
+    //Copy Thread Data Into RegFile
+    this->copyFromXC(tid);
+
+    //Set PC/NPC
+    regFile.pc[tid]  = src_xc->readPC();
+    regFile.npc[tid] = src_xc->readNextPC();
+
+    src_xc->setStatus(ExecContext::Active);
+
+    activateContext(tid,1);
+
+    //Reset ROB/IQ/LSQ Entries
+    commit.rob->resetEntries();
+    iew.resetEntries();
+#endif
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::removeThread(unsigned tid)
+{
+    DPRINTF(FullCPU,"[tid:%i] Removing thread data");
+#if 0
+    //Unbind Int Regs from Rename Map
+    for (int ireg = 0; ireg < TheISA::NumIntRegs; ireg++) {
+        PhysRegIndex phys_reg = renameMap[tid].lookup(ireg);
+
+        scoreboard.unsetReg(phys_reg);
+        freeList.addReg(phys_reg);
+    }
+
+    //Unbind Float Regs from Rename Map
+    for (int freg = 0; freg < TheISA::NumFloatRegs; freg++) {
+        PhysRegIndex phys_reg = renameMap[tid].lookup(freg);
+
+        scoreboard.unsetReg(phys_reg);
+        freeList.addReg(phys_reg);
+    }
+
+    //Copy Thread Data From RegFile
+    /* Fix Me:
+     * Do we really need to do this if we are removing a thread
+     * in the sense that it's finished (exiting)? If the thread is just
+     * being suspended we might...
+     */
+//    this->copyToXC(tid);
+
+    //Squash Throughout Pipeline
+    fetch.squash(0,tid);
+    decode.squash(tid);
+    rename.squash(tid);
+
+    assert(iew.ldstQueue.getCount(tid) == 0);
+
+    //Reset ROB/IQ/LSQ Entries
+    if (activeThreads.size() >= 1) {
+        commit.rob->resetEntries();
+        iew.resetEntries();
+    }
+#endif
+}
+
+
+template <class Impl>
+void
+FullO3CPU<Impl>::activateWhenReady(int tid)
+{
+    DPRINTF(FullCPU,"[tid:%i]: Checking if resources are available for incoming"
+            "(e.g. PhysRegs/ROB/IQ/LSQ) \n",
+            tid);
+
+    bool ready = true;
+
+    if (freeList.numFreeIntRegs() >= TheISA::NumIntRegs) {
+        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+                "Phys. Int. Regs.\n",
+                tid);
+        ready = false;
+    } else if (freeList.numFreeFloatRegs() >= TheISA::NumFloatRegs) {
+        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+                "Phys. Float. Regs.\n",
+                tid);
+        ready = false;
+    } else if (commit.rob->numFreeEntries() >=
+               commit.rob->entryAmount(activeThreads.size() + 1)) {
+        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+                "ROB entries.\n",
+                tid);
+        ready = false;
+    } else if (iew.instQueue.numFreeEntries() >=
+               iew.instQueue.entryAmount(activeThreads.size() + 1)) {
+        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+                "IQ entries.\n",
+                tid);
+        ready = false;
+    } else if (iew.ldstQueue.numFreeEntries() >=
+               iew.ldstQueue.entryAmount(activeThreads.size() + 1)) {
+        DPRINTF(FullCPU,"[tid:%i] Suspending thread due to not enough "
+                "LSQ entries.\n",
+                tid);
+        ready = false;
+    }
+
+    if (ready) {
+        insertThread(tid);
+
+        contextSwitch = false;
+
+        cpuWaitList.remove(tid);
+    } else {
+        suspendContext(tid);
+
+        //blocks fetch
+        contextSwitch = true;
+
+        //do waitlist
+        cpuWaitList.push_back(tid);
     }
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::activateContext(int thread_num, int delay)
+FullO3CPU<Impl>::activateContext(int tid, int delay)
 {
     // Needs to set each stage to running as well.
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive == activeThreads.end()) {
+        //May Need to Re-code this if the delay variable is the
+        //delay needed for thread to activate
+        DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
+                tid);
+
+        activeThreads.push_back(tid);
+    }
+
+    assert(_status == Idle || _status == SwitchedOut);
 
     scheduleTickEvent(delay);
 
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
+    fetch.wakeFromQuiesce();
+
     _status = Running;
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::suspendContext(int thread_num)
+FullO3CPU<Impl>::suspendContext(int tid)
+{
+    DPRINTF(FullCPU,"[tid: %i]: Suspended ...\n", tid);
+    unscheduleTickEvent();
+    _status = Idle;
+/*
+    //Remove From Active List, if Active
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive != activeThreads.end()) {
+        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        activeThreads.erase(isActive);
+    }
+*/
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::deallocateContext(int tid)
 {
-    panic("suspendContext unimplemented!");
+    DPRINTF(FullCPU,"[tid:%i]: Deallocating ...", tid);
+/*
+    //Remove From Active List, if Active
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive != activeThreads.end()) {
+        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        activeThreads.erase(isActive);
+
+        removeThread(tid);
+    }
+*/
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::deallocateContext(int thread_num)
+FullO3CPU<Impl>::haltContext(int tid)
 {
-    panic("deallocateContext unimplemented!");
+    DPRINTF(FullCPU,"[tid:%i]: Halted ...", tid);
+/*
+    //Remove From Active List, if Active
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive != activeThreads.end()) {
+        DPRINTF(FullCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        activeThreads.erase(isActive);
+
+        removeThread(tid);
+    }
+*/
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::haltContext(int thread_num)
+FullO3CPU<Impl>::switchOut(Sampler *_sampler)
 {
-    panic("haltContext unimplemented!");
+    sampler = _sampler;
+    switchCount = 0;
+    fetch.switchOut();
+    decode.switchOut();
+    rename.switchOut();
+    iew.switchOut();
+    commit.switchOut();
+
+    // Wake the CPU and record activity so everything can drain out if
+    // the CPU is currently idle.
+    wakeCPU();
+    activityRec.activity();
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::switchOut()
+FullO3CPU<Impl>::signalSwitched()
 {
-    panic("FullO3CPU does not have a switch out function.\n");
+    if (++switchCount == NumStages) {
+        fetch.doSwitchOut();
+        rename.doSwitchOut();
+        commit.doSwitchOut();
+        instList.clear();
+        while (!removeList.empty()) {
+            removeList.pop();
+        }
+
+        if (checker)
+            checker->switchOut(sampler);
+
+        if (tickEvent.scheduled())
+            tickEvent.squash();
+        sampler->signalSwitched();
+        _status = SwitchedOut;
+    }
+    assert(switchCount <= 5);
 }
 
 template <class Impl>
 void
 FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
 {
+    // Flush out any old data from the time buffers.
+    for (int i = 0; i < 10; ++i) {
+        timeBuffer.advance();
+        fetchQueue.advance();
+        decodeQueue.advance();
+        renameQueue.advance();
+        iewQueue.advance();
+    }
+
+    activityRec.reset();
+
     BaseCPU::takeOverFrom(oldCPU);
 
+    fetch.takeOverFrom();
+    decode.takeOverFrom();
+    rename.takeOverFrom();
+    iew.takeOverFrom();
+    commit.takeOverFrom();
+
     assert(!tickEvent.scheduled());
 
-    // Set all status's to active, schedule the
-    // CPU's tick event.
+    // @todo: Figure out how to properly select the tid to put onto
+    // the active threads list.
+    int tid = 0;
+
+    list<unsigned>::iterator isActive = find(
+        activeThreads.begin(), activeThreads.end(), tid);
+
+    if (isActive == activeThreads.end()) {
+        //May Need to Re-code this if the delay variable is the delay
+        //needed for thread to activate
+        DPRINTF(FullCPU, "Adding Thread %i to active threads list\n",
+                tid);
+
+        activeThreads.push_back(tid);
+    }
+
+    // Set all statuses to active, schedule the CPU's tick event.
+    // @todo: Fix up statuses so this is handled properly
     for (int i = 0; i < execContexts.size(); ++i) {
         ExecContext *xc = execContexts[i];
         if (xc->status() == ExecContext::Active && _status != Running) {
@@ -329,14 +754,8 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
             tickEvent.schedule(curTick);
         }
     }
-}
-
-template <class Impl>
-InstSeqNum
-FullO3CPU<Impl>::getAndIncrementInstSeq()
-{
-    // Hopefully this works right.
-    return globalSeqNum++;
+    if (!tickEvent.scheduled())
+        tickEvent.schedule(curTick);
 }
 
 template <class Impl>
@@ -363,7 +782,6 @@ FullO3CPU<Impl>::readFloatReg(int reg_idx)
 template <class Impl>
 FloatRegBits
 FullO3CPU<Impl>::readFloatRegBits(int reg_idx, int width)
-{
     return regFile.readFloatRegBits(reg_idx, width);
 }
 
@@ -411,156 +829,368 @@ FullO3CPU<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
 
 template <class Impl>
 uint64_t
-FullO3CPU<Impl>::readPC()
+FullO3CPU<Impl>::readArchIntReg(int reg_idx, unsigned tid)
+{
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+
+    return regFile.readIntReg(phys_reg);
+}
+
+template <class Impl>
+float
+FullO3CPU<Impl>::readArchFloatRegSingle(int reg_idx, unsigned tid)
+{
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+
+    return regFile.readFloatRegSingle(phys_reg);
+}
+
+template <class Impl>
+double
+FullO3CPU<Impl>::readArchFloatRegDouble(int reg_idx, unsigned tid)
+{
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+
+    return regFile.readFloatRegDouble(phys_reg);
+}
+
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, unsigned tid)
 {
-    return regFile.readPC();
+    int idx = reg_idx + TheISA::FP_Base_DepTag;
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(idx);
+
+    return regFile.readFloatRegInt(phys_reg);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::setNextPC(uint64_t val)
+FullO3CPU<Impl>::setArchIntReg(int reg_idx, uint64_t val, unsigned tid)
 {
-    regFile.setNextPC(val);
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+
+    regFile.setIntReg(phys_reg, val);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::setPC(Addr new_PC)
+FullO3CPU<Impl>::setArchFloatRegSingle(int reg_idx, float val, unsigned tid)
 {
-    regFile.setPC(new_PC);
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+
+    regFile.setFloatRegSingle(phys_reg, val);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::addInst(DynInstPtr &inst)
+FullO3CPU<Impl>::setArchFloatRegDouble(int reg_idx, double val, unsigned tid)
 {
-    instList.push_back(inst);
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
+
+    regFile.setFloatRegDouble(phys_reg, val);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::instDone()
+FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid)
 {
-    // Keep an instruction count.
-    numInsts++;
+    PhysRegIndex phys_reg = commitRenameMap[tid].lookup(reg_idx);
 
-    // Check for instruction-count-based events.
-    comInstEventQueue[0]->serviceEvents(numInsts);
+    regFile.setFloatRegInt(phys_reg, val);
+}
+
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readPC(unsigned tid)
+{
+    return commit.readPC(tid);
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::removeBackInst(DynInstPtr &inst)
+FullO3CPU<Impl>::setPC(Addr new_PC,unsigned tid)
 {
-    DynInstPtr inst_to_delete;
+    commit.setPC(new_PC, tid);
+}
 
-    // Walk through the instruction list, removing any instructions
-    // that were inserted after the given instruction, inst.
-    while (instList.back() != inst)
-    {
-        assert(!instList.empty());
+template <class Impl>
+uint64_t
+FullO3CPU<Impl>::readNextPC(unsigned tid)
+{
+    return commit.readNextPC(tid);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setNextPC(uint64_t val,unsigned tid)
+{
+    commit.setNextPC(val, tid);
+}
+
+template <class Impl>
+typename FullO3CPU<Impl>::ListIt
+FullO3CPU<Impl>::addInst(DynInstPtr &inst)
+{
+    instList.push_back(inst);
 
-        // Obtain the pointer to the instruction.
-        inst_to_delete = instList.back();
+    return --(instList.end());
+}
 
-        DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
-                inst_to_delete->seqNum, inst_to_delete->readPC());
+template <class Impl>
+void
+FullO3CPU<Impl>::instDone(unsigned tid)
+{
+    // Keep an instruction count.
+    thread[tid]->numInst++;
+    thread[tid]->numInsts++;
+    committedInsts[tid]++;
+    totalCommittedInsts++;
 
-        // Remove the instruction from the list.
-        instList.pop_back();
+    // Check for instruction-count-based events.
+    comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
+}
 
-        // Mark it as squashed.
-        inst_to_delete->setSquashed();
-    }
+template <class Impl>
+void
+FullO3CPU<Impl>::addToRemoveList(DynInstPtr &inst)
+{
+    removeInstsThisCycle = true;
+
+    removeList.push(inst->getInstListIt());
 }
 
 template <class Impl>
 void
 FullO3CPU<Impl>::removeFrontInst(DynInstPtr &inst)
 {
-    DynInstPtr inst_to_remove;
+    DPRINTF(FullCPU, "FullCPU: Removing committed instruction [tid:%i] PC %#x "
+            "[sn:%lli]\n",
+            inst->threadNumber, inst->readPC(), inst->seqNum);
 
-    // The front instruction should be the same one being asked to be removed.
-    assert(instList.front() == inst);
+    removeInstsThisCycle = true;
 
     // Remove the front instruction.
-    inst_to_remove = inst;
-    instList.pop_front();
-
-    DPRINTF(FullCPU, "FullCPU: Removing committed instruction %#x, PC %#x\n",
-            inst_to_remove, inst_to_remove->readPC());
+    removeList.push(inst->getInstListIt());
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::removeInstsNotInROB()
+FullO3CPU<Impl>::removeInstsNotInROB(unsigned tid)
 {
-    DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
-            "list.\n");
+    DPRINTF(FullCPU, "FullCPU: Thread %i: Deleting instructions from instruction"
+            " list.\n", tid);
+
+    ListIt end_it;
+
+    bool rob_empty = false;
+
+    if (instList.empty()) {
+        return;
+    } else if (rob.isEmpty(/*tid*/)) {
+        DPRINTF(FullCPU, "FullCPU: ROB is empty, squashing all insts.\n");
+        end_it = instList.begin();
+        rob_empty = true;
+    } else {
+        end_it = (rob.readTailInst(tid))->getInstListIt();
+        DPRINTF(FullCPU, "FullCPU: ROB is not empty, squashing insts not in ROB.\n");
+    }
+
+    removeInstsThisCycle = true;
+
+    ListIt inst_it = instList.end();
+
+    inst_it--;
+
+    // Walk through the instruction list, removing any instructions
+    // that were inserted after the given instruction iterator, end_it.
+    while (inst_it != end_it) {
+        assert(!instList.empty());
 
-    DynInstPtr rob_tail = rob.readTailInst();
+        squashInstIt(inst_it, tid);
 
-    removeBackInst(rob_tail);
+        inst_it--;
+    }
+
+    // If the ROB was empty, then we actually need to remove the first
+    // instruction as well.
+    if (rob_empty) {
+        squashInstIt(inst_it, tid);
+    }
 }
 
 template <class Impl>
 void
-FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num)
+FullO3CPU<Impl>::removeInstsUntil(const InstSeqNum &seq_num,
+                                  unsigned tid)
 {
+    assert(!instList.empty());
+
+    removeInstsThisCycle = true;
+
+    ListIt inst_iter = instList.end();
+
+    inst_iter--;
+
     DPRINTF(FullCPU, "FullCPU: Deleting instructions from instruction "
-            "list.\n");
+            "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
+            tid, seq_num, (*inst_iter)->seqNum);
 
-    DynInstPtr inst_to_delete;
+    while ((*inst_iter)->seqNum > seq_num) {
 
-    while (instList.back()->seqNum > seq_num) {
-        assert(!instList.empty());
+        bool break_loop = (inst_iter == instList.begin());
 
-        // Obtain the pointer to the instruction.
-        inst_to_delete = instList.back();
+        squashInstIt(inst_iter, tid);
 
-        DPRINTF(FullCPU, "FullCPU: Removing instruction %i, PC %#x\n",
-                inst_to_delete->seqNum, inst_to_delete->readPC());
+        inst_iter--;
 
-        // Remove the instruction from the list.
-        instList.back() = NULL;
-        instList.pop_back();
+        if (break_loop)
+            break;
+    }
+}
+
+template <class Impl>
+inline void
+FullO3CPU<Impl>::squashInstIt(const ListIt &instIt, const unsigned &tid)
+{
+    if ((*instIt)->threadNumber == tid) {
+        DPRINTF(FullCPU, "FullCPU: Squashing instruction, "
+                "[tid:%i] [sn:%lli] PC %#x\n",
+                (*instIt)->threadNumber,
+                (*instIt)->seqNum,
+                (*instIt)->readPC());
 
         // Mark it as squashed.
-        inst_to_delete->setSquashed();
-    }
+        (*instIt)->setSquashed();
 
+        // @todo: Formulate a consistent method for deleting
+        // instructions from the instruction list
+        // Remove the instruction from the list.
+        removeList.push(instIt);
+    }
 }
 
+template <class Impl>
+void
+FullO3CPU<Impl>::cleanUpRemovedInsts()
+{
+    while (!removeList.empty()) {
+        DPRINTF(FullCPU, "FullCPU: Removing instruction, "
+                "[tid:%i] [sn:%lli] PC %#x\n",
+                (*removeList.front())->threadNumber,
+                (*removeList.front())->seqNum,
+                (*removeList.front())->readPC());
+
+        instList.erase(removeList.front());
+
+        removeList.pop();
+    }
+
+    removeInstsThisCycle = false;
+}
+/*
 template <class Impl>
 void
 FullO3CPU<Impl>::removeAllInsts()
 {
     instList.clear();
 }
-
+*/
 template <class Impl>
 void
 FullO3CPU<Impl>::dumpInsts()
 {
     int num = 0;
-    typename list<DynInstPtr>::iterator inst_list_it = instList.begin();
 
-    while (inst_list_it != instList.end())
-    {
-        cprintf("Instruction:%i\nPC:%#x\nSN:%lli\nIssued:%i\nSquashed:%i\n\n",
-                num, (*inst_list_it)->readPC(), (*inst_list_it)->seqNum,
-                (*inst_list_it)->isIssued(), (*inst_list_it)->isSquashed());
+    ListIt inst_list_it = instList.begin();
+
+    cprintf("Dumping Instruction List\n");
+
+    while (inst_list_it != instList.end()) {
+        cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+                "Squashed:%i\n\n",
+                num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber,
+                (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
         inst_list_it++;
         ++num;
     }
 }
-
+/*
 template <class Impl>
 void
 FullO3CPU<Impl>::wakeDependents(DynInstPtr &inst)
 {
     iew.wakeDependents(inst);
 }
+*/
+template <class Impl>
+void
+FullO3CPU<Impl>::wakeCPU()
+{
+    if (activityRec.active() || tickEvent.scheduled()) {
+        DPRINTF(Activity, "CPU already running.\n");
+        return;
+    }
+
+    DPRINTF(Activity, "Waking up CPU\n");
+
+    idleCycles += (curTick - 1) - lastRunningCycle;
+
+    tickEvent.schedule(curTick);
+}
+
+template <class Impl>
+int
+FullO3CPU<Impl>::getFreeTid()
+{
+    for (int i=0; i < numThreads; i++) {
+        if (!tids[i]) {
+            tids[i] = true;
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::doContextSwitch()
+{
+    if (contextSwitch) {
+
+        //ADD CODE TO DEACTIVE THREAD HERE (???)
+
+        for (int tid=0; tid < cpuWaitList.size(); tid++) {
+            activateWhenReady(tid);
+        }
+
+        if (cpuWaitList.size() == 0)
+            contextSwitch = true;
+    }
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::updateThreadPriority()
+{
+    if (activeThreads.size() > 1)
+    {
+        //DEFAULT TO ROUND ROBIN SCHEME
+        //e.g. Move highest priority to end of thread list
+        list<unsigned>::iterator list_begin = activeThreads.begin();
+        list<unsigned>::iterator list_end   = activeThreads.end();
+
+        unsigned high_thread = *list_begin;
+
+        activeThreads.erase(list_begin);
+
+        activeThreads.push_back(high_thread);
+    }
+}
 
 // Forward declaration of FullO3CPU.
 template class FullO3CPU<AlphaSimpleImpl>;