CPU: Add readBytes and writeBytes functions to the exec contexts.
[gem5.git] / src / cpu / inorder / cpu.cc
index c522fc23824abeab37650112a823980696c9b73f..059996b0723bc58b780ac1855fd64136975a4d90 100644 (file)
  *
  */
 
-#include "config/full_system.hh"
+#include <algorithm>
 
 #include "arch/utility.hh"
-#include "cpu/exetrace.hh"
+#include "config/full_system.hh"
+#include "config/the_isa.hh"
 #include "cpu/activity.hh"
-#include "cpu/simple_thread.hh"
-#include "cpu/thread_context.hh"
 #include "cpu/base.hh"
-#include "cpu/inorder/inorder_dyn_inst.hh"
-#include "cpu/inorder/thread_context.hh"
-#include "cpu/inorder/thread_state.hh"
+#include "cpu/exetrace.hh"
 #include "cpu/inorder/cpu.hh"
-#include "params/InOrderCPU.hh"
-#include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/first_stage.hh"
-#include "cpu/inorder/resources/resource_list.hh"
+#include "cpu/inorder/inorder_dyn_inst.hh"
+#include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/resource_pool.hh"
+#include "cpu/inorder/resources/resource_list.hh"
+#include "cpu/inorder/thread_context.hh"
+#include "cpu/inorder/thread_state.hh"
+#include "cpu/simple_thread.hh"
+#include "cpu/thread_context.hh"
 #include "mem/translating_port.hh"
+#include "params/InOrderCPU.hh"
 #include "sim/process.hh"
 #include "sim/stat_control.hh"
-#include <algorithm>
+
+#if FULL_SYSTEM
+#include "cpu/quiesce_event.hh"
+#include "sim/system.hh"
+#endif
+
+#if THE_ISA == ALPHA_ISA
+#include "arch/alpha/osfpal.hh"
+#endif
 
 using namespace std;
 using namespace TheISA;
@@ -74,25 +84,25 @@ InOrderCPU::TickEvent::description()
 }
 
 InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
-                             Fault fault, unsigned _tid, unsigned _vpe)
-    : Event(CPU_Tick_Pri), cpu(_cpu)
+                               Fault fault, ThreadID _tid, DynInstPtr inst,
+                               unsigned event_pri_offset)
+    : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)),
+      cpu(_cpu)
 {
-    setEvent(e_type, fault, _tid, _vpe);
+    setEvent(e_type, fault, _tid, inst);
 }
 
 
 std::string InOrderCPU::eventNames[NumCPUEvents] =
 {
     "ActivateThread",
-    "DeallocateThread",
+    "ActivateNextReadyThread",
+    "DeactivateThread",
+    "HaltThread",
     "SuspendThread",
-    "DisableThreads",
-    "EnableThreads",
-    "DisableVPEs",
-    "EnableVPEs",
     "Trap",
     "InstGraduated",
-    "SquashAll",
+    "SquashFromMemStall",
     "UpdatePCs"
 };
 
@@ -105,28 +115,24 @@ InOrderCPU::CPUEvent::process()
         cpu->activateThread(tid);
         break;
 
-      //@TODO: Consider Implementing "Suspend Thread" as Separate from Deallocate
-      case SuspendThread: // Suspend & Deallocate are same for now.
-        //cpu->suspendThread(tid);
-        //break;
-      case DeallocateThread:
-        cpu->deallocateThread(tid);
+      case ActivateNextReadyThread:
+        cpu->activateNextReadyThread();
         break;
 
-      case EnableVPEs:
-        cpu->enableVPEs(vpe);
+      case DeactivateThread:
+        cpu->deactivateThread(tid);
         break;
 
-      case DisableVPEs:
-        cpu->disableVPEs(tid, vpe);
+      case HaltThread:
+        cpu->haltThread(tid);
         break;
 
-      case EnableThreads:
-        cpu->enableThreads(vpe);
+      case SuspendThread: 
+        cpu->suspendThread(tid);
         break;
 
-      case DisableThreads:
-        cpu->disableThreads(tid, vpe);
+      case SquashFromMemStall:
+        cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid);
         break;
 
       case Trap:
@@ -134,12 +140,14 @@ InOrderCPU::CPUEvent::process()
         break;
 
       default:
-        fatal("Unrecognized Event Type %d", cpuEventType);
+        fatal("Unrecognized Event Type %s", eventNames[cpuEventType]);    
     }
-
+    
     cpu->cpuEventRemoveList.push(this);
 }
 
+    
+
 const char *
 InOrderCPU::CPUEvent::description()
 {
@@ -150,9 +158,11 @@ void
 InOrderCPU::CPUEvent::scheduleEvent(int delay)
 {
     if (squashed())
-      mainEventQueue.reschedule(this,curTick + cpu->ticks(delay));
+        mainEventQueue.reschedule(this, cpu->nextCycle(curTick +
+                                                       cpu->ticks(delay)));
     else if (!scheduled())
-      mainEventQueue.schedule(this,curTick + cpu->ticks(delay));
+        mainEventQueue.schedule(this, cpu->nextCycle(curTick +
+                                                     cpu->ticks(delay)));
 }
 
 void
@@ -168,16 +178,23 @@ InOrderCPU::InOrderCPU(Params *params)
       coreType("default"),
       _status(Idle),
       tickEvent(this),
-      miscRegFile(this),
       timeBuffer(2 , 2),
       removeInstsThisCycle(false),
       activityRec(params->name, NumStages, 10, params->activity),
+#if FULL_SYSTEM
+      system(params->system),
+      physmem(system->physmem),
+#endif // FULL_SYSTEM
+#ifdef DEBUG
+      cpuEventNum(0),
+      resReqCount(0),
+#endif // DEBUG
       switchCount(0),
       deferRegistration(false/*params->deferRegistration*/),
       stageTracing(params->stageTracing),
-      numThreads(params->numThreads),
-      numVirtProcs(1)
-{
+      instsPerSwitch(0)
+{    
+    ThreadID active_threads;
     cpu_params = params;
 
     resPool = new ResourcePool(this, params);
@@ -185,7 +202,10 @@ InOrderCPU::InOrderCPU(Params *params)
     // Resize for Multithreading CPUs
     thread.resize(numThreads);
 
-    int active_threads = params->workload.size();
+#if FULL_SYSTEM
+    active_threads = 1;
+#else
+    active_threads = params->workload.size();
 
     if (active_threads > MaxThreads) {
         panic("Workload Size too large. Increase the 'MaxThreads'"
@@ -193,6 +213,25 @@ InOrderCPU::InOrderCPU(Params *params)
               "edit your workload size.");
     }
 
+    
+    if (active_threads > 1) {
+        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+        if (threadModel == SMT) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
+        } else if (threadModel == SwitchOnCacheMiss) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to "
+                    "Switch On Cache Miss\n");
+        }
+        
+    } else {
+        threadModel = Single;
+    }
+     
+        
+    
+#endif
+
     // Bind the fetch & data ports from the resource pool.
     fetchPortIdx = resPool->getPortIdx(params->fetchMemPort);
     if (fetchPortIdx == 0) {
@@ -204,40 +243,37 @@ InOrderCPU::InOrderCPU(Params *params)
         fatal("Unable to find port for data.\n");
     }
 
-
-    // Hard-Code Bindings to ITB & DTB
-    itbIdx = resPool->getResIdx(name() + "."  + "I-TLB");
-    if (itbIdx == 0) {
-        fatal("Unable to find ITB resource.\n");
-    }
-
-    dtbIdx = resPool->getResIdx(name() + "."  + "D-TLB");
-    if (dtbIdx == 0) {
-        fatal("Unable to find DTB resource.\n");
-    }
-
-    for (int i = 0; i < numThreads; ++i) {
-        if (i < params->workload.size()) {
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+#if FULL_SYSTEM
+        // SMT is not supported in FS mode yet.
+        assert(numThreads == 1);
+        thread[tid] = new Thread(this, 0);
+#else
+        if (tid < (ThreadID)params->workload.size()) {
             DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
-                    i, this->thread[i]);
-            this->thread[i] = new Thread(this, i, params->workload[i],
-                                         i);
+                    tid, params->workload[tid]->prog_fname);
+            thread[tid] =
+                new Thread(this, tid, params->workload[tid]);
         } else {
             //Allocate Empty thread so M5 can use later
             //when scheduling threads to CPU
             Process* dummy_proc = params->workload[0];
-            this->thread[i] = new Thread(this, i, dummy_proc, i);
+            thread[tid] = new Thread(this, tid, dummy_proc);
         }
+        
+        // Eventually set this with parameters...
+        asid[tid] = tid;
+#endif
 
         // Setup the TC that will serve as the interface to the threads/CPU.
         InOrderThreadContext *tc = new InOrderThreadContext;
         tc->cpu = this;
-        tc->thread = this->thread[i];
+        tc->thread = thread[tid];
 
         // Give the thread the TC.
-        thread[i]->tc = tc;
-        thread[i]->setFuncExeInst(0);
-        globalSeqNum[i] = 1;
+        thread[tid]->tc = tc;
+        thread[tid]->setFuncExeInst(0);
+        globalSeqNum[tid] = 1;
 
         // Add the TC to the CPU's list of TC's.
         this->threadContexts.push_back(tc);
@@ -269,7 +305,7 @@ InOrderCPU::InOrderCPU(Params *params)
     }
 
     // Initialize thread specific variables
-    for (int tid=0; tid < numThreads; tid++) {
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
         archRegDepMap[tid].setCPU(this);
 
         nonSpecInstActive[tid] = false;
@@ -278,23 +314,34 @@ InOrderCPU::InOrderCPU(Params *params)
         squashSeqNum[tid] = MaxAddr;
         lastSquashCycle[tid] = 0;
 
-        intRegFile[tid].clear();
-        floatRegFile[tid].clear();
+        memset(intRegs[tid], 0, sizeof(intRegs[tid]));
+        memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
+        isa[tid].clear();
+
+        isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/);
+
+        // Define dummy instructions and resource requests to be used.
+        dummyInst[tid] = new InOrderDynInst(this, 
+                                            thread[tid], 
+                                            0, 
+                                            tid, 
+                                            asid[tid]);
+
+        dummyReq[tid] = new ResourceRequest(resPool->getResource(0), 
+                                            dummyInst[tid], 
+                                            0, 
+                                            0, 
+                                            0, 
+                                            0);        
     }
 
-    // Update miscRegFile if necessary
-    if (numThreads > 1) {
-        miscRegFile.expandForMultithreading(numThreads, numVirtProcs);
-    }
-
-    miscRegFile.clear();
+    dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyReqInst->setSquashed();
 
+    dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyBufferInst->setSquashed();
+    
     lastRunningCycle = curTick;
-    contextSwitch = false;
-
-    // Define dummy instructions and resource requests to be used.
-    DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0);
-    dummyReq = new ResourceRequest(NULL, NULL, 0, 0, 0, 0);
 
     // Reset CPU to reset state.
 #if FULL_SYSTEM
@@ -304,10 +351,17 @@ InOrderCPU::InOrderCPU(Params *params)
     reset();
 #endif
 
+    dummyBufferInst->resetInstCount();
+    
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
     scheduleTickEvent(0);
 }
 
+InOrderCPU::~InOrderCPU()
+{
+    delete resPool;
+}
+
 
 void
 InOrderCPU::regStats()
@@ -315,7 +369,49 @@ InOrderCPU::regStats()
     /* Register the Resource Pool's stats here.*/
     resPool->regStats();
 
+    /* Register for each Pipeline Stage */
+    for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) {
+        pipelineStage[stage_num]->regStats();
+    }
+
     /* Register any of the InOrderCPU's stats here.*/
+    instsPerCtxtSwitch
+        .name(name() + ".instsPerContextSwitch")
+        .desc("Instructions Committed Per Context Switch")
+        .prereq(instsPerCtxtSwitch);
+    
+    numCtxtSwitches
+        .name(name() + ".contextSwitches")
+        .desc("Number of context switches");
+
+    comLoads
+        .name(name() + ".comLoads")
+        .desc("Number of Load instructions committed");
+
+    comStores
+        .name(name() + ".comStores")
+        .desc("Number of Store instructions committed");
+
+    comBranches
+        .name(name() + ".comBranches")
+        .desc("Number of Branches instructions committed");
+
+    comNops
+        .name(name() + ".comNops")
+        .desc("Number of Nop instructions committed");
+
+    comNonSpec
+        .name(name() + ".comNonSpec")
+        .desc("Number of Non-Speculative instructions committed");
+
+    comInts
+        .name(name() + ".comInts")
+        .desc("Number of Integer instructions committed");
+
+    comFloats
+        .name(name() + ".comFloats")
+        .desc("Number of Floating Point instructions committed");
+            
     timesIdled
         .name(name() + ".timesIdled")
         .desc("Number of times that the entire CPU went into an idle state and"
@@ -324,9 +420,17 @@ InOrderCPU::regStats()
 
     idleCycles
         .name(name() + ".idleCycles")
-        .desc("Total number of cycles that the CPU has spent unscheduled due "
-              "to idling")
-        .prereq(idleCycles);
+        .desc("Number of cycles cpu's stages were not processed");
+
+    runCycles
+        .name(name() + ".runCycles")
+        .desc("Number of cycles cpu stages are processed.");
+
+    activity
+        .name(name() + ".activity")
+        .desc("Percentage of cycles cpu is active")
+        .precision(6);
+    activity = (runCycles / numCycles) * 100;
 
     threadCycles
         .init(numThreads)
@@ -335,7 +439,7 @@ InOrderCPU::regStats()
 
     smtCycles
         .name(name() + ".smtCycles")
-        .desc("Total number of cycles that the CPU was simultaneous multithreading.(SMT)");
+        .desc("Total number of cycles that the CPU was in SMT-mode");
 
     committedInsts
         .init(numThreads)
@@ -355,7 +459,7 @@ InOrderCPU::regStats()
         .name(name() + ".cpi")
         .desc("CPI: Cycles Per Instruction (Per-Thread)")
         .precision(6);
-    cpi = threadCycles / committedInsts;
+    cpi = numCycles / committedInsts;
 
     smtCpi
         .name(name() + ".smt_cpi")
@@ -373,7 +477,7 @@ InOrderCPU::regStats()
         .name(name() + ".ipc")
         .desc("IPC: Instructions Per Cycle (Per-Thread)")
         .precision(6);
-    ipc =  committedInsts / threadCycles;
+    ipc =  committedInsts / numCycles;
 
     smtIpc
         .name(name() + ".smt_ipc")
@@ -398,18 +502,27 @@ InOrderCPU::tick()
 
     ++numCycles;
 
+    bool pipes_idle = true;
+    
     //Tick each of the stages
     for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
         pipelineStage[stNum]->tick();
+
+        pipes_idle = pipes_idle && pipelineStage[stNum]->idle;
     }
 
+    if (pipes_idle)
+        idleCycles++;
+    else
+        runCycles++;
+    
     // Now advance the time buffers one tick
     timeBuffer.advance();
     for (int sqNum=0; sqNum < NumStages - 1; sqNum++) {
         stageQueue[sqNum]->advance();
     }
     activityRec.advance();
-
+   
     // Any squashed requests, events, or insts then remove them now
     cleanUpRemovedReqs();
     cleanUpRemovedEvents();
@@ -428,7 +541,8 @@ InOrderCPU::tick()
             //Tick next_tick = curTick + cycles(1);
             //tickEvent.schedule(next_tick);
             mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1));
-            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", nextCycle() + curTick);
+            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
+                    nextCycle(curTick + 1));
         }
     }
 
@@ -446,35 +560,31 @@ InOrderCPU::init()
 
     // Set inSyscall so that the CPU doesn't squash when initially
     // setting up registers.
-    for (int i = 0; i < number_of_threads; ++i)
-        thread[i]->inSyscall = true;
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        thread[tid]->inSyscall = true;
 
 #if FULL_SYSTEM
-    for (int tid=0; tid < number_of_threads; tid++) {
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
         ThreadContext *src_tc = threadContexts[tid];
         TheISA::initCPU(src_tc, src_tc->contextId());
     }
 #endif
 
     // Clear inSyscall.
-    for (int i = 0; i < number_of_threads; ++i)
-        thread[i]->inSyscall = false;
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        thread[tid]->inSyscall = false;
 
     // Call Initializiation Routine for Resource Pool
     resPool->init();
 }
 
-void
-InOrderCPU::readFunctional(Addr addr, uint32_t &buffer)
-{
-    tcBase()->getMemPort()->readBlob(addr, (uint8_t*)&buffer, sizeof(uint32_t));
-    buffer = gtoh(buffer);
-}
-
 void
 InOrderCPU::reset()
 {
-  miscRegFile.reset(coreType, numThreads, numVirtProcs, dynamic_cast<BaseCPU*>(this));
+    for (int i = 0; i < numThreads; i++) {
+        isa[i].reset(coreType, numThreads,
+                     1/*numVirtProcs*/, dynamic_cast<BaseCPU*>(this));
+    }
 }
 
 Port*
@@ -483,247 +593,271 @@ InOrderCPU::getPort(const std::string &if_name, int idx)
     return resPool->getPort(if_name, idx);
 }
 
-void
-InOrderCPU::trap(Fault fault, unsigned tid, int delay)
+#if FULL_SYSTEM
+Fault
+InOrderCPU::hwrei(ThreadID tid)
 {
-    scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay);
+    panic("hwrei: Unimplemented");
+    
+    return NoFault;
 }
 
-void
-InOrderCPU::trapCPU(Fault fault, unsigned tid)
-{
-    fault->invoke(tcBase(tid));
-}
 
-void
-InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
-                           unsigned tid, unsigned vpe, unsigned delay)
+bool
+InOrderCPU::simPalCheck(int palFunc, ThreadID tid)
 {
-    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe);
+    panic("simPalCheck: Unimplemented");
 
-    if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event Type #%s for cycle %i.\n",
-                eventNames[c_event], curTick + delay);
-        mainEventQueue.schedule(cpu_event,curTick + delay);
-    } else {
-        cpu_event->process();
-        cpuEventRemoveList.push(cpu_event);
-    }
+    return true;
+}
 
-    // Broadcast event to the Resource Pool
-    DynInstPtr dummy_inst = new InOrderDynInst(this, NULL, getNextEventNum(), tid);
-    resPool->scheduleEvent(c_event, dummy_inst, 0, 0, tid);
+
+Fault
+InOrderCPU::getInterrupts()
+{
+    // Check if there are any outstanding interrupts
+    return this->interrupts->getInterrupt(this->threadContexts[0]);
 }
 
-inline bool
-InOrderCPU::isThreadActive(unsigned tid)
+
+void
+InOrderCPU::processInterrupts(Fault interrupt)
 {
-  list<unsigned>::iterator isActive = std::find(
-        activeThreads.begin(), activeThreads.end(), tid);
+    // Check for interrupts here.  For now can copy the code that
+    // exists within isa_fullsys_traits.hh.  Also assume that thread 0
+    // is the one that handles the interrupts.
+    // @todo: Possibly consolidate the interrupt checking code.
+    // @todo: Allow other threads to handle interrupts.
 
-    return (isActive != activeThreads.end());
+    assert(interrupt != NoFault);
+    this->interrupts->updateIntrInfo(this->threadContexts[0]);
+
+    DPRINTF(InOrderCPU, "Interrupt %s being handled\n", interrupt->name());
+    this->trap(interrupt, 0);
 }
 
 
 void
-InOrderCPU::activateThread(unsigned tid)
+InOrderCPU::updateMemPorts()
 {
-    if (!isThreadActive(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to active threads list in CPU.\n",
-                tid);
-        activeThreads.push_back(tid);
-
-        wakeCPU();
-    }
+    // Update all ThreadContext's memory ports (Functional/Virtual
+    // Ports)
+    ThreadID size = thread.size();
+    for (ThreadID i = 0; i < size; ++i)
+        thread[i]->connectMemPorts(thread[i]->getTC());
 }
+#endif
 
 void
-InOrderCPU::deactivateThread(unsigned tid)
+InOrderCPU::trap(Fault fault, ThreadID tid, int delay)
 {
-    DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid);
+    //@ Squash Pipeline during TRAP
+    scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay);
+}
 
-    if (isThreadActive(tid)) {
-        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        list<unsigned>::iterator thread_it = std::find(activeThreads.begin(),
-                                                 activeThreads.end(), tid);
+void
+InOrderCPU::trapCPU(Fault fault, ThreadID tid)
+{
+    fault->invoke(tcBase(tid));
+}
 
-        removePipelineStalls(*thread_it);
+void 
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+{
+    scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
+}
 
-        //@TODO: change stage status' to Idle?
 
-        activeThreads.erase(thread_it);
+void
+InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num,
+                                ThreadID tid)
+{
+    DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n");
+        
+    // Squash all instructions in each stage including 
+    // instruction that caused the squash (seq_num - 1)
+    // NOTE: The stage bandwidth needs to be cleared so thats why
+    //       the stalling instruction is squashed as well. The stalled
+    //       instruction is previously placed in another intermediate buffer
+    //       while it's stall is being handled.
+    InstSeqNum squash_seq_num = seq_num - 1;
+    
+    for (int stNum=stage_num; stNum >= 0 ; stNum--) {
+        pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid);
     }
 }
 
 void
-InOrderCPU::removePipelineStalls(unsigned tid)
+InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
+                             ThreadID tid, DynInstPtr inst, 
+                             unsigned delay, unsigned event_pri_offset)
 {
-    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
-            tid);
+    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
+                                       event_pri_offset);
 
-    for (int stNum = 0; stNum < NumStages ; stNum++) {
-        pipelineStage[stNum]->removeStalls(tid);
+    Tick sked_tick = nextCycle(curTick + ticks(delay));
+    if (delay >= 0) {
+        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+                eventNames[c_event], curTick + delay, tid);
+        mainEventQueue.schedule(cpu_event, sked_tick);
+    } else {
+        cpu_event->process();
+        cpuEventRemoveList.push(cpu_event);
     }
 
+    // Broadcast event to the Resource Pool
+    // Need to reset tid just in case this is a dummy instruction
+    inst->setTid(tid);        
+    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
 }
+
 bool
-InOrderCPU::isThreadInCPU(unsigned tid)
+InOrderCPU::isThreadActive(ThreadID tid)
 {
-  list<unsigned>::iterator isCurrent = std::find(
-        currentThreads.begin(), currentThreads.end(), tid);
+  list<ThreadID>::iterator isActive =
+      std::find(activeThreads.begin(), activeThreads.end(), tid);
 
-    return (isCurrent != currentThreads.end());
+    return (isActive != activeThreads.end());
 }
 
-void
-InOrderCPU::addToCurrentThreads(unsigned tid)
+bool
+InOrderCPU::isThreadReady(ThreadID tid)
 {
-    if (!isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n",
-                tid);
-        currentThreads.push_back(tid);
-    }
-}
+  list<ThreadID>::iterator isReady =
+      std::find(readyThreads.begin(), readyThreads.end(), tid);
 
-void
-InOrderCPU::removeFromCurrentThreads(unsigned tid)
-{
-    if (isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n",
-                tid);
-        list<unsigned>::iterator isCurrent = std::find(
-            currentThreads.begin(), currentThreads.end(), tid);
-        currentThreads.erase(isCurrent);
-    }
+    return (isReady != readyThreads.end());
 }
 
 bool
-InOrderCPU::isThreadSuspended(unsigned tid)
+InOrderCPU::isThreadSuspended(ThreadID tid)
 {
-  list<unsigned>::iterator isSuspended = std::find(
-        suspendedThreads.begin(), suspendedThreads.end(), tid);
+  list<ThreadID>::iterator isSuspended =
+      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
 
-    return (isSuspended!= suspendedThreads.end());
+    return (isSuspended != suspendedThreads.end());
 }
 
 void
-InOrderCPU::enableVirtProcElement(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Enabling of concurrent virtual processor execution",
-            vpe);
-
-    scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, vpe);
+InOrderCPU::activateNextReadyThread()
+{
+    if (readyThreads.size() >= 1) {          
+        ThreadID ready_tid = readyThreads.front();
+        
+        // Activate in Pipeline
+        activateThread(ready_tid);                        
+        
+        // Activate in Resource Pool
+        resPool->activateAll(ready_tid);
+        
+        list<ThreadID>::iterator ready_it =
+            std::find(readyThreads.begin(), readyThreads.end(), ready_tid);
+        readyThreads.erase(ready_it);                        
+    } else {
+        DPRINTF(InOrderCPU,
+                "Attempting to activate new thread, but No Ready Threads to"
+                "activate.\n");
+        DPRINTF(InOrderCPU,
+                "Unable to switch to next active thread.\n");
+    }        
 }
 
 void
-InOrderCPU::enableVPEs(unsigned vpe)
+InOrderCPU::activateThread(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Concurrent Execution "
-            "virtual processors %i", vpe);
+    if (isThreadSuspended(tid)) {
+        DPRINTF(InOrderCPU,
+                "Removing [tid:%i] from suspended threads list.\n", tid);
+
+        list<ThreadID>::iterator susp_it =
+            std::find(suspendedThreads.begin(), suspendedThreads.end(), 
+                      tid);
+        suspendedThreads.erase(susp_it);                        
+    }
 
-    list<unsigned>::iterator thread_it = currentThreads.begin();
+    if (threadModel == SwitchOnCacheMiss &&
+        numActiveThreads() == 1) {
+        DPRINTF(InOrderCPU,
+                "Ignoring activation of [tid:%i], since [tid:%i] is "
+                "already running.\n", tid, activeThreadId());
+        
+        DPRINTF(InOrderCPU,"Placing [tid:%i] on ready threads list\n", 
+                tid);        
+
+        readyThreads.push_back(tid);
+        
+    } else if (!isThreadActive(tid)) {                
+        DPRINTF(InOrderCPU,
+                "Adding [tid:%i] to active threads list.\n", tid);
+        activeThreads.push_back(tid);
+        
+        activateThreadInPipeline(tid);
 
-    while (thread_it != currentThreads.end()) {
-        if (!isThreadSuspended(*thread_it)) {
-            activateThread(*thread_it);
-        }
-        thread_it++;
+        thread[tid]->lastActivate = curTick;            
+
+        tcBase(tid)->setStatus(ThreadContext::Active);    
+
+        wakeCPU();
+
+        numCtxtSwitches++;        
     }
 }
 
 void
-InOrderCPU::disableVirtProcElement(unsigned tid, unsigned vpe)
+InOrderCPU::activateThreadInPipeline(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Disabling of concurrent virtual processor execution",
-            vpe);
-
-    scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, vpe);
+    for (int stNum=0; stNum < NumStages; stNum++) {
+        pipelineStage[stNum]->activateThread(tid);
+    }    
 }
 
 void
-InOrderCPU::disableVPEs(unsigned tid, unsigned vpe)
+InOrderCPU::deactivateContext(ThreadID tid, int delay)
 {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Disabling Concurrent Execution of "
-            "virtual processors %i", vpe);
-
-    unsigned base_vpe = TheISA::getVirtProcNum(tcBase(tid));
+    DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
 
-    list<unsigned>::iterator thread_it = activeThreads.begin();
+    scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst[tid], delay);
 
-    std::vector<list<unsigned>::iterator> removeList;
-
-    while (thread_it != activeThreads.end()) {
-        if (base_vpe != vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
-    }
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
 
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
-    }
+    _status = Running;
 }
 
 void
-InOrderCPU::enableMultiThreading(unsigned vpe)
+InOrderCPU::deactivateThread(ThreadID tid)
 {
-    // Schedule event to take place at end of cycle
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on "
-            "virtual processor %i", vpe);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid);
 
-    scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, vpe);
-}
+    if (isThreadActive(tid)) {
+        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        list<ThreadID>::iterator thread_it =
+            std::find(activeThreads.begin(), activeThreads.end(), tid);
 
-void
-InOrderCPU::enableThreads(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Multithreading on "
-            "virtual processor %i", vpe);
+        removePipelineStalls(*thread_it);
 
-    list<unsigned>::iterator thread_it = currentThreads.begin();
+        activeThreads.erase(thread_it);
 
-    while (thread_it != currentThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            if (!isThreadSuspended(*thread_it)) {
-                activateThread(*thread_it);
-            }
-        }
-        thread_it++;
+        // Ideally, this should be triggered from the
+        // suspendContext/Thread functions
+        tcBase(tid)->setStatus(ThreadContext::Suspended);    
     }
-}
-void
-InOrderCPU::disableMultiThreading(unsigned tid, unsigned vpe)
-{
-    // Schedule event to take place at end of cycle
-   DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on "
-            "virtual processor %i", tid, vpe);
 
-    scheduleCpuEvent(DisableThreads, NoFault, tid, vpe);
+    assert(!isThreadActive(tid));    
 }
 
 void
-InOrderCPU::disableThreads(unsigned tid, unsigned vpe)
+InOrderCPU::removePipelineStalls(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "[tid:%i]: Disabling Multithreading on "
-            "virtual processor %i", tid, vpe);
-
-    list<unsigned>::iterator thread_it = activeThreads.begin();
-
-    std::vector<list<unsigned>::iterator> removeList;
+    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
+            tid);
 
-    while (thread_it != activeThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
+    for (int stNum = 0; stNum < NumStages ; stNum++) {
+        pipelineStage[stNum]->removeStalls(tid);
     }
 
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
-    }
 }
 
 void
@@ -733,8 +867,8 @@ InOrderCPU::updateThreadPriority()
     {
         //DEFAULT TO ROUND ROBIN SCHEME
         //e.g. Move highest priority to end of thread list
-        list<unsigned>::iterator list_begin = activeThreads.begin();
-        list<unsigned>::iterator list_end   = activeThreads.end();
+        list<ThreadID>::iterator list_begin = activeThreads.begin();
+        list<ThreadID>::iterator list_end   = activeThreads.end();
 
         unsigned high_thread = *list_begin;
 
@@ -748,7 +882,7 @@ inline void
 InOrderCPU::tickThreadStats()
 {
     /** Keep track of cycles that each thread is active */
-    list<unsigned>::iterator thread_it = activeThreads.begin();
+    list<ThreadID>::iterator thread_it = activeThreads.begin();
     while (thread_it != activeThreads.end()) {
         threadCycles[*thread_it]++;
         thread_it++;
@@ -761,11 +895,12 @@ InOrderCPU::tickThreadStats()
 }
 
 void
-InOrderCPU::activateContext(unsigned tid, int delay)
+InOrderCPU::activateContext(ThreadID tid, int delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
-    scheduleCpuEvent(ActivateThread, NoFault, tid, 0/*vpe*/, delay);
+    
+    scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst[tid], delay);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -774,71 +909,74 @@ InOrderCPU::activateContext(unsigned tid, int delay)
     _status = Running;
 }
 
-
 void
-InOrderCPU::suspendContext(unsigned tid, int delay)
+InOrderCPU::activateNextReadyContext(int delay)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, 0/*vpe*/, delay);
-    //_status = Idle;
-}
+    DPRINTF(InOrderCPU,"Activating next ready thread\n");
 
-void
-InOrderCPU::suspendThread(unsigned tid)
-{
-    DPRINTF(InOrderCPU,"[tid: %i]: Suspended ...\n", tid);
-    deactivateThread(tid);
-}
+    // NOTE: Add 5 to the event priority so that we always activate
+    // threads after we've finished deactivating, squashing,etc.
+    // other threads
+    scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], 
+                     delay, 5);
 
-void
-InOrderCPU::deallocateContext(unsigned tid, int delay)
-{
-    scheduleCpuEvent(DeallocateThread, NoFault, tid, 0/*vpe*/, delay);
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
+
+    _status = Running;
 }
 
 void
-InOrderCPU::deallocateThread(unsigned tid)
+InOrderCPU::haltContext(ThreadID tid, int delay)
 {
-    DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...", tid);
-
-    removeFromCurrentThreads(tid);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
 
-    deactivateThread(tid);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
 
-    squashThreadInPipeline(tid);
+    activityRec.activity();
 }
 
 void
-InOrderCPU::squashThreadInPipeline(unsigned tid)
+InOrderCPU::haltThread(ThreadID tid)
 {
-    //Squash all instructions in each stage
-    for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
-        pipelineStage[stNum]->squash(0 /*seq_num*/, tid);
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Halted Threads List...\n", tid);
+    deactivateThread(tid);
+    squashThreadInPipeline(tid);   
+    haltedThreads.push_back(tid);    
+
+    tcBase(tid)->setStatus(ThreadContext::Halted);    
+
+    if (threadModel == SwitchOnCacheMiss) {        
+        activateNextReadyContext();    
     }
 }
 
 void
-InOrderCPU::haltContext(unsigned tid, int delay)
+InOrderCPU::suspendContext(ThreadID tid, int delay)
 {
-    DPRINTF(InOrderCPU, "[tid:%i]: Halt context called.\n", tid);
-
-    // Halt is same thing as deallocate for now
-    // @TODO: Differentiate between halt & deallocate in the CPU
-    // model
-    deallocateContext(tid, delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
 }
 
 void
-InOrderCPU::insertThread(unsigned tid)
+InOrderCPU::suspendThread(ThreadID tid)
 {
-    panic("Unimplemented Function\n.");
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Suspended Threads List...\n",
+            tid);
+    deactivateThread(tid);
+    suspendedThreads.push_back(tid);    
+    thread[tid]->lastSuspend = curTick;    
+
+    tcBase(tid)->setStatus(ThreadContext::Suspended);    
 }
 
 void
-InOrderCPU::removeThread(unsigned tid)
+InOrderCPU::squashThreadInPipeline(ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "Removing Thread %i from CPU.\n", tid);
-
-    /** Broadcast to CPU resources*/
+    //Squash all instructions in each stage
+    for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
+        pipelineStage[stNum]->squash(0 /*seq_num*/, tid);
+    }
 }
 
 PipelineStage*
@@ -847,105 +985,98 @@ InOrderCPU::getPipeStage(int stage_num)
     return pipelineStage[stage_num];
 }
 
-
-void
-InOrderCPU::activateWhenReady(int tid)
-{
-    panic("Unimplemented Function\n.");
-}
-
-
 uint64_t
-InOrderCPU::readPC(unsigned tid)
+InOrderCPU::readPC(ThreadID tid)
 {
     return PC[tid];
 }
 
 
 void
-InOrderCPU::setPC(Addr new_PC, unsigned tid)
+InOrderCPU::setPC(Addr new_PC, ThreadID tid)
 {
     PC[tid] = new_PC;
 }
 
 
 uint64_t
-InOrderCPU::readNextPC(unsigned tid)
+InOrderCPU::readNextPC(ThreadID tid)
 {
     return nextPC[tid];
 }
 
 
 void
-InOrderCPU::setNextPC(uint64_t new_NPC, unsigned tid)
+InOrderCPU::setNextPC(uint64_t new_NPC, ThreadID tid)
 {
     nextPC[tid] = new_NPC;
 }
 
 
 uint64_t
-InOrderCPU::readNextNPC(unsigned tid)
+InOrderCPU::readNextNPC(ThreadID tid)
 {
     return nextNPC[tid];
 }
 
 
 void
-InOrderCPU::setNextNPC(uint64_t new_NNPC, unsigned tid)
+InOrderCPU::setNextNPC(uint64_t new_NNPC, ThreadID tid)
 {
     nextNPC[tid] = new_NNPC;
 }
 
 uint64_t
-InOrderCPU::readIntReg(int reg_idx, unsigned tid)
+InOrderCPU::readIntReg(int reg_idx, ThreadID tid)
 {
-    return intRegFile[tid].readReg(reg_idx);
+    return intRegs[tid][reg_idx];
 }
 
 FloatReg
-InOrderCPU::readFloatReg(int reg_idx, unsigned tid, int width)
+InOrderCPU::readFloatReg(int reg_idx, ThreadID tid)
 {
-
-    return floatRegFile[tid].readReg(reg_idx, width);
+    return floatRegs.f[tid][reg_idx];
 }
 
 FloatRegBits
-InOrderCPU::readFloatRegBits(int reg_idx, unsigned tid, int width)
+InOrderCPU::readFloatRegBits(int reg_idx, ThreadID tid)
 {;
-    return floatRegFile[tid].readRegBits(reg_idx, width);
+    return floatRegs.i[tid][reg_idx];
 }
 
 void
-InOrderCPU::setIntReg(int reg_idx, uint64_t val, unsigned tid)
+InOrderCPU::setIntReg(int reg_idx, uint64_t val, ThreadID tid)
 {
-    intRegFile[tid].setReg(reg_idx, val);
+    intRegs[tid][reg_idx] = val;
 }
 
 
 void
-InOrderCPU::setFloatReg(int reg_idx, FloatReg val, unsigned tid, int width)
+InOrderCPU::setFloatReg(int reg_idx, FloatReg val, ThreadID tid)
 {
-    floatRegFile[tid].setReg(reg_idx, val, width);
+    floatRegs.f[tid][reg_idx] = val;
 }
 
 
 void
-InOrderCPU::setFloatRegBits(int reg_idx, FloatRegBits val, unsigned tid, int width)
+InOrderCPU::setFloatRegBits(int reg_idx, FloatRegBits val, ThreadID tid)
 {
-    floatRegFile[tid].setRegBits(reg_idx, val, width);
+    floatRegs.i[tid][reg_idx] = val;
 }
 
 uint64_t
-InOrderCPU::readRegOtherThread(unsigned reg_idx, unsigned tid)
+InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
 {
     // If Default value is set, then retrieve target thread
-    if (tid == -1) {
+    if (tid == InvalidThreadID) {
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {                   // Integer Register File
+    if (reg_idx < FP_Base_DepTag) {                   
+        // Integer Register File
         return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          // Float Register File
+    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        // Float Register File
         reg_idx -= FP_Base_DepTag;
         return readFloatRegBits(reg_idx, tid);
     } else {
@@ -954,10 +1085,11 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, unsigned tid)
     }
 }
 void
-InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val, unsigned tid)
+InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
+                              ThreadID tid)
 {
     // If Default value is set, then retrieve target thread
-    if (tid == -1) {
+    if (tid == InvalidThreadID) {
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
@@ -973,50 +1105,69 @@ InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val, unsigned tid
 }
 
 MiscReg
-InOrderCPU::readMiscRegNoEffect(int misc_reg, unsigned tid)
+InOrderCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid)
 {
-    return miscRegFile.readRegNoEffect(misc_reg, tid);
+    return isa[tid].readMiscRegNoEffect(misc_reg);
 }
 
 MiscReg
-InOrderCPU::readMiscReg(int misc_reg, unsigned tid)
+InOrderCPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    return miscRegFile.readReg(misc_reg, tcBase(tid), tid);
+    return isa[tid].readMiscReg(misc_reg, tcBase(tid));
 }
 
 void
-InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned tid)
+InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    miscRegFile.setRegNoEffect(misc_reg, val, tid);
+    isa[tid].setMiscRegNoEffect(misc_reg, val);
 }
 
 void
-InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, unsigned tid)
+InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    miscRegFile.setReg(misc_reg, val, tcBase(tid), tid);
+    isa[tid].setMiscReg(misc_reg, val, tcBase(tid));
 }
 
 
 InOrderCPU::ListIt
 InOrderCPU::addInst(DynInstPtr &inst)
 {
-    int tid = inst->readTid();
+    ThreadID tid = inst->readTid();
 
     instList[tid].push_back(inst);
 
     return --(instList[tid].end());
 }
 
+void 
+InOrderCPU::updateContextSwitchStats()
+{
+    // Set Average Stat Here, then reset to 0    
+    instsPerCtxtSwitch = instsPerSwitch;
+    instsPerSwitch = 0;
+}
+
+    
 void
-InOrderCPU::instDone(DynInstPtr inst, unsigned tid)
+InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 {
-    // Set the CPU's PCs - This contributes to the precise state of the CPU which can be used
-    // when restoring a thread to the CPU after a fork or after an exception
-    // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if it's a branch or not
+    // Set the CPU's PCs - This contributes to the precise state of the CPU 
+    // which can be used when restoring a thread to the CPU after after any
+    // type of context switching activity (fork, exception, etc.)
     setPC(inst->readPC(), tid);
     setNextPC(inst->readNextPC(), tid);
     setNextNPC(inst->readNextNPC(), tid);
 
+    if (inst->isControl()) {
+        thread[tid]->lastGradIsBranch = true;
+        thread[tid]->lastBranchPC = inst->readPC();
+        thread[tid]->lastBranchNextPC = inst->readNextPC();
+        thread[tid]->lastBranchNextNPC = inst->readNextNPC();        
+    } else {
+        thread[tid]->lastGradIsBranch = false;
+    }
+        
+
     // Finalize Trace Data For Instruction
     if (inst->traceData) {
         //inst->traceData->setCycle(curTick);
@@ -1027,9 +1178,9 @@ InOrderCPU::instDone(DynInstPtr inst, unsigned tid)
         inst->traceData = NULL;
     }
 
-    // Set Last Graduated Instruction In Thread State
-    //thread[tid]->lastGradInst = inst;
-
+    // Increment active thread's instruction count
+    instsPerSwitch++;
+    
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
 
@@ -1047,23 +1198,53 @@ InOrderCPU::instDone(DynInstPtr inst, unsigned tid)
         smtCommittedInsts[tid]++;
     }
 
+    // Instruction-Mix Stats
+    if (inst->isLoad()) {
+        comLoads++;
+    } else if (inst->isStore()) {
+        comStores++;
+    } else if (inst->isControl()) {
+        comBranches++;
+    } else if (inst->isNop()) {
+        comNops++;
+    } else if (inst->isNonSpeculative()) {
+        comNonSpec++;
+    } else if (inst->isInteger()) {
+        comInts++;
+    } else if (inst->isFloating()) {
+        comFloats++;
+    }
+
     // Check for instruction-count-based events.
     comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
 
     // Broadcast to other resources an instruction
     // has been completed
-    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, tid);
+    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
+                           0, 0, tid);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
 }
 
+// currently unused function, but substitute repetitive code w/this function
+// call
 void
 InOrderCPU::addToRemoveList(DynInstPtr &inst)
 {
     removeInstsThisCycle = true;
-
-    removeList.push(inst->getInstListIt());
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    }  else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
+    
 }
 
 void
@@ -1076,12 +1257,22 @@ InOrderCPU::removeInst(DynInstPtr &inst)
     removeInstsThisCycle = true;
 
     // Remove the instruction.
-    removeList.push(inst->getInstListIt());
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    } else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already on remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
+
 }
 
 void
-InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num,
-                                  unsigned tid)
+InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
 {
     //assert(!instList[tid].empty());
 
@@ -1091,7 +1282,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num,
 
     inst_iter--;
 
-    DPRINTF(InOrderCPU, "Deleting instructions from CPU instruction "
+    DPRINTF(InOrderCPU, "Squashing instructions from CPU instruction "
             "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
             tid, seq_num, (*inst_iter)->seqNum);
 
@@ -1110,7 +1301,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num,
 
 
 inline void
-InOrderCPU::squashInstIt(const ListIt &instIt, const unsigned &tid)
+InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
 {
     if ((*instIt)->threadNumber == tid) {
         DPRINTF(InOrderCPU, "Squashing instruction, "
@@ -1121,8 +1312,22 @@ InOrderCPU::squashInstIt(const ListIt &instIt, const unsigned &tid)
 
         (*instIt)->setSquashed();
 
-        removeList.push(instIt);
+        if (!(*instIt)->isRemoveList()) {            
+            DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                    "[sn:%lli] to remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+            (*instIt)->setRemoveList();        
+            removeList.push(instIt);
+        } else {
+            DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i]"
+                    " PC %#x [sn:%lli], already on remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+        }
+    
     }
+    
 }
 
 
@@ -1134,15 +1339,15 @@ InOrderCPU::cleanUpRemovedInsts()
                 "[tid:%i] [sn:%lli] PC %#x\n",
                 (*removeList.front())->threadNumber,
                 (*removeList.front())->seqNum,
-                (*removeList.front())->readPC());
+               (*removeList.front())->readPC());
 
         DynInstPtr inst = *removeList.front();
-        int tid = inst->threadNumber;
+        ThreadID tid = inst->threadNumber;
 
         // Make Sure Resource Schedule Is Emptied Out
         ThePipeline::ResSchedule *inst_sched = &inst->resSched;
         while (!inst_sched->empty()) {
-            ThePipeline::ScheduleEntry* sch_entry = inst_sched->top();
+            ScheduleEntry* sch_entry = inst_sched->top();
             inst_sched->pop();
             delete sch_entry;
         }
@@ -1162,11 +1367,6 @@ InOrderCPU::cleanUpRemovedInsts()
         instList[tid].erase(removeList.front());
 
         removeList.pop();
-
-        DPRINTF(RefCount, "pop from remove list: [sn:%i]: Refcount = %i.\n",
-                inst->seqNum,
-                0/*inst->curCount()*/);
-
     }
 
     removeInstsThisCycle = false;
@@ -1178,22 +1378,19 @@ InOrderCPU::cleanUpRemovedReqs()
     while (!reqRemoveList.empty()) {
         ResourceRequest *res_req = reqRemoveList.front();
 
-        DPRINTF(RefCount, "[tid:%i]: Removing Request, "
-                "[sn:%lli] [slot:%i] [stage_num:%i] [res:%s] [refcount:%i].\n",
+        DPRINTF(RefCount, "[tid:%i] [sn:%lli]: Removing Request "
+                "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n",
                 res_req->inst->threadNumber,
                 res_req->inst->seqNum,
-                res_req->getSlot(),
                 res_req->getStageNum(),
                 res_req->res->name(),
-                0/*res_req->inst->curCount()*/);
+                (res_req->isCompleted()) ?
+                res_req->getComplSlot() : res_req->getSlot(),
+                res_req->isCompleted());
 
         reqRemoveList.pop();
 
         delete res_req;
-
-        DPRINTF(RefCount, "after remove request: [sn:%i]: Refcount = %i.\n",
-                res_req->inst->seqNum,
-                0/*res_req->inst->curCount()*/);
     }
 }
 
@@ -1238,14 +1435,36 @@ InOrderCPU::wakeCPU()
 
     DPRINTF(Activity, "Waking up CPU\n");
 
-    //@todo: figure out how to count idleCycles correctly
-    //idleCycles += (curTick - 1) - lastRunningCycle;
+    Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle);
+
+    idleCycles += extra_cycles;    
+    for (int stage_num = 0; stage_num < NumStages; stage_num++) {
+        pipelineStage[stage_num]->idleCycles += extra_cycles;
+    }    
 
-    mainEventQueue.schedule(&tickEvent, curTick);
+    numCycles += extra_cycles;
+
+    mainEventQueue.schedule(&tickEvent, nextCycle(curTick));
 }
 
+#if FULL_SYSTEM
+
 void
-InOrderCPU::syscall(int64_t callnum, int tid)
+InOrderCPU::wakeup()
+{
+    if (this->thread[0]->status() != ThreadContext::Suspended)
+        return;
+
+    this->wakeCPU();
+
+    DPRINTF(Quiesce, "Suspended Processor woken\n");
+    this->threadContexts[0]->activate();
+}
+#endif
+
+#if !FULL_SYSTEM
+void
+InOrderCPU::syscall(int64_t callnum, ThreadID tid)
 {
     DPRINTF(InOrderCPU, "[tid:%i] Executing syscall().\n\n", tid);
 
@@ -1265,20 +1484,7 @@ InOrderCPU::syscall(int64_t callnum, int tid)
     // Clear Non-Speculative Block Variable
     nonSpecInstActive[tid] = false;
 }
-
-Fault
-InOrderCPU::read(DynInstPtr inst)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->doDataAccess(inst);
-}
-
-Fault
-InOrderCPU::write(DynInstPtr inst, uint64_t *res)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->doDataAccess(inst, res);
-}
+#endif
 
 void
 InOrderCPU::prefetch(DynInstPtr inst)
@@ -1298,7 +1504,8 @@ InOrderCPU::writeHint(DynInstPtr inst)
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    TLBUnit *itb_res = dynamic_cast<TLBUnit*>(resPool->getResource(itbIdx));
+    CacheUnit *itb_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
     return itb_res->tlb();
 }
 
@@ -1306,6 +1513,30 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    TLBUnit *dtb_res = dynamic_cast<TLBUnit*>(resPool->getResource(dtbIdx));
+    CacheUnit *dtb_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
     return dtb_res->tlb();
 }
+
+Fault
+InOrderCPU::read(DynInstPtr inst, Addr addr,
+                 uint8_t *data, unsigned size, unsigned flags)
+{
+    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
+    //       you want to run w/out caches?
+    CacheUnit *cache_res = 
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+
+    return cache_res->read(inst, addr, data, size, flags);
+}
+
+Fault
+InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size,
+                  Addr addr, unsigned flags, uint64_t *write_res)
+{
+    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
+    //       you want to run w/out caches?
+    CacheUnit *cache_res =
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+    return cache_res->write(inst, data, size, addr, flags, write_res);
+}