arch, cpu: Add support for flattening misc register indexes.
[gem5.git] / src / cpu / inorder / cpu.cc
index d36867439f4d830f83ae518d3f9d99c0168e62f0..5a02f94d9bcc969b8ca0fcf80ab666860596aea8 100644 (file)
@@ -1,4 +1,17 @@
 /*
+ * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2007 MIPS Technologies, Inc.
  * All rights reserved.
  *
 #include <algorithm>
 
 #include "arch/utility.hh"
-#include "config/full_system.hh"
+#include "base/bigint.hh"
 #include "config/the_isa.hh"
-#include "cpu/activity.hh"
-#include "cpu/base.hh"
-#include "cpu/exetrace.hh"
+#include "cpu/inorder/resources/cache_unit.hh"
+#include "cpu/inorder/resources/resource_list.hh"
 #include "cpu/inorder/cpu.hh"
 #include "cpu/inorder/first_stage.hh"
 #include "cpu/inorder/inorder_dyn_inst.hh"
 #include "cpu/inorder/pipeline_traits.hh"
 #include "cpu/inorder/resource_pool.hh"
-#include "cpu/inorder/resources/resource_list.hh"
 #include "cpu/inorder/thread_context.hh"
 #include "cpu/inorder/thread_state.hh"
+#include "cpu/activity.hh"
+#include "cpu/base.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/quiesce_event.hh"
+#include "cpu/reg_class.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
-#include "mem/translating_port.hh"
+#include "debug/Activity.hh"
+#include "debug/InOrderCPU.hh"
+#include "debug/InOrderCachePort.hh"
+#include "debug/Interrupt.hh"
+#include "debug/Quiesce.hh"
+#include "debug/RefCount.hh"
+#include "debug/SkedCache.hh"
 #include "params/InOrderCPU.hh"
+#include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/stat_control.hh"
-
-#if FULL_SYSTEM
-#include "cpu/quiesce_event.hh"
 #include "sim/system.hh"
-#endif
 
 #if THE_ISA == ALPHA_ISA
 #include "arch/alpha/osfpal.hh"
@@ -65,6 +84,30 @@ using namespace std;
 using namespace TheISA;
 using namespace ThePipeline;
 
+InOrderCPU::CachePort::CachePort(CacheUnit *_cacheUnit,
+                                 const std::string& name) :
+    MasterPort(_cacheUnit->name() + name, _cacheUnit->cpu),
+    cacheUnit(_cacheUnit)
+{ }
+
+bool
+InOrderCPU::CachePort::recvTimingResp(Packet *pkt)
+{
+    if (pkt->isError())
+        DPRINTF(InOrderCachePort, "Got error packet back for address: %x\n",
+                pkt->getAddr());
+    else
+        cacheUnit->processCacheCompletion(pkt);
+
+    return true;
+}
+
+void
+InOrderCPU::CachePort::recvRetry()
+{
+    cacheUnit->recvRetry();
+}
+
 InOrderCPU::TickEvent::TickEvent(InOrderCPU *c)
   : Event(CPU_Tick_Pri), cpu(c)
 { }
@@ -78,16 +121,15 @@ InOrderCPU::TickEvent::process()
 
 
 const char *
-InOrderCPU::TickEvent::description()
+InOrderCPU::TickEvent::description() const
 {
     return "InOrderCPU tick event";
 }
 
 InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
                                Fault fault, ThreadID _tid, DynInstPtr inst,
-                               unsigned event_pri_offset)
-    : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)),
-      cpu(_cpu)
+                               CPUEventPri event_pri)
+    : Event(event_pri), cpu(_cpu)
 {
     setEvent(e_type, fault, _tid, inst);
 }
@@ -101,7 +143,7 @@ std::string InOrderCPU::eventNames[NumCPUEvents] =
     "HaltThread",
     "SuspendThread",
     "Trap",
-    "InstGraduated",
+    "Syscall",
     "SquashFromMemStall",
     "UpdatePCs"
 };
@@ -113,6 +155,7 @@ InOrderCPU::CPUEvent::process()
     {
       case ActivateThread:
         cpu->activateThread(tid);
+        cpu->resPool->activateThread(tid);
         break;
 
       case ActivateNextReadyThread:
@@ -121,22 +164,35 @@ InOrderCPU::CPUEvent::process()
 
       case DeactivateThread:
         cpu->deactivateThread(tid);
+        cpu->resPool->deactivateThread(tid);
         break;
 
       case HaltThread:
         cpu->haltThread(tid);
+        cpu->resPool->deactivateThread(tid);
         break;
 
       case SuspendThread: 
         cpu->suspendThread(tid);
+        cpu->resPool->suspendThread(tid);
         break;
 
       case SquashFromMemStall:
         cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid);
+        cpu->resPool->squashDueToMemStall(inst, inst->squashingStage,
+                                          inst->seqNum, tid);
         break;
 
       case Trap:
-        cpu->trapCPU(fault, tid);
+        DPRINTF(InOrderCPU, "Trapping CPU\n");
+        cpu->trap(fault, tid, inst);
+        cpu->resPool->trap(fault, tid, inst);
+        cpu->trapPending[tid] = false;
+        break;
+
+      case Syscall:
+        cpu->syscall(inst->syscallNum, tid);
+        cpu->resPool->trap(fault, tid, inst);
         break;
 
       default:
@@ -149,20 +205,16 @@ InOrderCPU::CPUEvent::process()
     
 
 const char *
-InOrderCPU::CPUEvent::description()
+InOrderCPU::CPUEvent::description() const
 {
     return "InOrderCPU event";
 }
 
 void
-InOrderCPU::CPUEvent::scheduleEvent(int delay)
+InOrderCPU::CPUEvent::scheduleEvent(Cycles delay)
 {
-    if (squashed())
-        mainEventQueue.reschedule(this, cpu->nextCycle(curTick +
-                                                       cpu->ticks(delay)));
-    else if (!scheduled())
-        mainEventQueue.schedule(this, cpu->nextCycle(curTick +
-                                                     cpu->ticks(delay)));
+    assert(!scheduled() || squashed());
+    cpu->reschedule(this, cpu->clockEdge(delay), true);
 }
 
 void
@@ -178,98 +230,91 @@ InOrderCPU::InOrderCPU(Params *params)
       coreType("default"),
       _status(Idle),
       tickEvent(this),
+      stageWidth(params->stageWidth),
+      resPool(new ResourcePool(this, params)),
+      isa(numThreads, NULL),
       timeBuffer(2 , 2),
+      dataPort(resPool->getDataUnit(), ".dcache_port"),
+      instPort(resPool->getInstUnit(), ".icache_port"),
       removeInstsThisCycle(false),
       activityRec(params->name, NumStages, 10, params->activity),
-#if FULL_SYSTEM
       system(params->system),
-      physmem(system->physmem),
-#endif // FULL_SYSTEM
 #ifdef DEBUG
       cpuEventNum(0),
       resReqCount(0),
 #endif // DEBUG
-      switchCount(0),
-      deferRegistration(false/*params->deferRegistration*/),
+      drainCount(0),
       stageTracing(params->stageTracing),
+      lastRunningCycle(0),
       instsPerSwitch(0)
 {    
-    ThreadID active_threads;
     cpu_params = params;
 
-    resPool = new ResourcePool(this, params);
-
     // Resize for Multithreading CPUs
     thread.resize(numThreads);
 
-#if FULL_SYSTEM
-    active_threads = 1;
-#else
-    active_threads = params->workload.size();
-
-    if (active_threads > MaxThreads) {
-        panic("Workload Size too large. Increase the 'MaxThreads'"
-              "in your InOrder implementation or "
-              "edit your workload size.");
-    }
+    ThreadID active_threads = params->workload.size();
+    if (FullSystem) {
+        active_threads = 1;
+    } else {
+        active_threads = params->workload.size();
 
-    
-    if (active_threads > 1) {
-        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
-
-        if (threadModel == SMT) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
-        } else if (threadModel == SwitchOnCacheMiss) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to "
-                    "Switch On Cache Miss\n");
+        if (active_threads > MaxThreads) {
+            panic("Workload Size too large. Increase the 'MaxThreads'"
+                  "in your InOrder implementation or "
+                  "edit your workload size.");
         }
-        
-    } else {
-        threadModel = Single;
-    }
-     
-        
-    
-#endif
 
-    // Bind the fetch & data ports from the resource pool.
-    fetchPortIdx = resPool->getPortIdx(params->fetchMemPort);
-    if (fetchPortIdx == 0) {
-        fatal("Unable to find port to fetch instructions from.\n");
-    }
 
-    dataPortIdx = resPool->getPortIdx(params->dataMemPort);
-    if (dataPortIdx == 0) {
-        fatal("Unable to find port for data.\n");
+        if (active_threads > 1) {
+            threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+            if (threadModel == SMT) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");
+            } else if (threadModel == SwitchOnCacheMiss) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to "
+                        "Switch On Cache Miss\n");
+            }
+
+        } else {
+            threadModel = Single;
+        }
     }
 
     for (ThreadID tid = 0; tid < numThreads; ++tid) {
-#if FULL_SYSTEM
-        // SMT is not supported in FS mode yet.
-        assert(numThreads == 1);
-        thread[tid] = new Thread(this, 0);
-#else
-        if (tid < (ThreadID)params->workload.size()) {
-            DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
-                    tid, params->workload[tid]->prog_fname);
-            thread[tid] =
-                new Thread(this, tid, params->workload[tid]);
+        isa[tid] = params->isa[tid];
+        pc[tid].set(0);
+        lastCommittedPC[tid].set(0);
+
+        if (FullSystem) {
+            // SMT is not supported in FS mode yet.
+            assert(numThreads == 1);
+            thread[tid] = new Thread(this, 0, NULL);
         } else {
-            //Allocate Empty thread so M5 can use later
-            //when scheduling threads to CPU
-            Process* dummy_proc = params->workload[0];
-            thread[tid] = new Thread(this, tid, dummy_proc);
+            if (tid < (ThreadID)params->workload.size()) {
+                DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
+                        tid, params->workload[tid]->progName());
+                thread[tid] =
+                    new Thread(this, tid, params->workload[tid]);
+            } else {
+                //Allocate Empty thread so M5 can use later
+                //when scheduling threads to CPU
+                Process* dummy_proc = params->workload[0];
+                thread[tid] = new Thread(this, tid, dummy_proc);
+            }
+
+            // Eventually set this with parameters...
+            asid[tid] = tid;
         }
-        
-        // Eventually set this with parameters...
-        asid[tid] = tid;
-#endif
 
         // Setup the TC that will serve as the interface to the threads/CPU.
         InOrderThreadContext *tc = new InOrderThreadContext;
         tc->cpu = this;
         tc->thread = thread[tid];
 
+        // Setup quiesce event.
+        this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc);
+
         // Give the thread the TC.
         thread[tid]->tc = tc;
         thread[tid]->setFuncExeInst(0);
@@ -316,9 +361,10 @@ InOrderCPU::InOrderCPU(Params *params)
 
         memset(intRegs[tid], 0, sizeof(intRegs[tid]));
         memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
-        isa[tid].clear();
-
-        isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/);
+#ifdef ISA_HAS_CC_REGS
+        memset(ccRegs[tid], 0, sizeof(ccRegs[tid]));
+#endif
+        isa[tid]->clear();
 
         // Define dummy instructions and resource requests to be used.
         dummyInst[tid] = new InOrderDynInst(this, 
@@ -327,41 +373,225 @@ InOrderCPU::InOrderCPU(Params *params)
                                             tid, 
                                             asid[tid]);
 
-        dummyReq[tid] = new ResourceRequest(resPool->getResource(0), 
-                                            dummyInst[tid], 
-                                            0, 
-                                            0, 
-                                            0, 
-                                            0);        
+        dummyReq[tid] = new ResourceRequest(resPool->getResource(0));
+
+
+        if (FullSystem) {
+            // Use this dummy inst to force squashing behind every instruction
+            // in pipeline
+            dummyTrapInst[tid] = new InOrderDynInst(this, NULL, 0, 0, 0);
+            dummyTrapInst[tid]->seqNum = 0;
+            dummyTrapInst[tid]->squashSeqNum = 0;
+            dummyTrapInst[tid]->setTid(tid);
+        }
+
+        trapPending[tid] = false;
+
+    }
+
+    // InOrderCPU always requires an interrupt controller.
+    if (!params->switched_out && !interrupts) {
+        fatal("InOrderCPU %s has no interrupt controller.\n"
+              "Ensure createInterruptController() is called.\n", name());
     }
 
     dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
     dummyReqInst->setSquashed();
+    dummyReqInst->resetInstCount();
 
     dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0);
     dummyBufferInst->setSquashed();
-    
-    lastRunningCycle = curTick;
+    dummyBufferInst->resetInstCount();
 
-    // Reset CPU to reset state.
-#if FULL_SYSTEM
-    Fault resetFault = new ResetFault();
-    resetFault->invoke(tcBase());
-#else
-    reset();
-#endif
+    endOfSkedIt = skedCache.end();
+    frontEndSked = createFrontEndSked();
+    faultSked = createFaultSked();
 
-    dummyBufferInst->resetInstCount();
+    lastRunningCycle = curCycle();
+
+    lockAddr = 0;
+    lockFlag = false;
     
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
-    scheduleTickEvent(0);
+    scheduleTickEvent(Cycles(0));
 }
 
 InOrderCPU::~InOrderCPU()
 {
     delete resPool;
+
+    SkedCacheIt sked_it = skedCache.begin();
+    SkedCacheIt sked_end = skedCache.end();
+
+    while (sked_it != sked_end) {
+        delete (*sked_it).second;
+        sked_it++;
+    }
+    skedCache.clear();
 }
 
+m5::hash_map<InOrderCPU::SkedID, ThePipeline::RSkedPtr> InOrderCPU::skedCache;
+
+RSkedPtr
+InOrderCPU::createFrontEndSked()
+{
+    RSkedPtr res_sked = new ResourceSked();
+    int stage_num = 0;
+    StageScheduler F(res_sked, stage_num++);
+    StageScheduler D(res_sked, stage_num++);
+
+    // FETCH
+    F.needs(FetchSeq, FetchSeqUnit::AssignNextPC);
+    F.needs(ICache, FetchUnit::InitiateFetch);
+
+    // DECODE
+    D.needs(ICache, FetchUnit::CompleteFetch);
+    D.needs(Decode, DecodeUnit::DecodeInst);
+    D.needs(BPred, BranchPredictor::PredictBranch);
+    D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
+
+
+    DPRINTF(SkedCache, "Resource Sked created for instruction Front End\n");
+
+    return res_sked;
+}
+
+RSkedPtr
+InOrderCPU::createFaultSked()
+{
+    RSkedPtr res_sked = new ResourceSked();
+    StageScheduler W(res_sked, NumStages - 1);
+    W.needs(Grad, GraduationUnit::CheckFault);
+    DPRINTF(SkedCache, "Resource Sked created for instruction Faults\n");
+    return res_sked;
+}
+
+RSkedPtr
+InOrderCPU::createBackEndSked(DynInstPtr inst)
+{
+    RSkedPtr res_sked = lookupSked(inst);
+    if (res_sked != NULL) {
+        DPRINTF(SkedCache, "Found %s in sked cache.\n",
+                inst->instName());
+        return res_sked;
+    } else {
+        res_sked = new ResourceSked();
+    }
+
+    int stage_num = ThePipeline::BackEndStartStage;
+    StageScheduler X(res_sked, stage_num++);
+    StageScheduler M(res_sked, stage_num++);
+    StageScheduler W(res_sked, stage_num++);
+
+    if (!inst->staticInst) {
+        warn_once("Static Instruction Object Not Set. Can't Create"
+                  " Back End Schedule");
+        return NULL;
+    }
+
+    // EXECUTE
+    X.needs(RegManager, UseDefUnit::MarkDestRegs);
+    for (int idx=0; idx < inst->numSrcRegs(); idx++) {
+        if (!idx || !inst->isStore()) {
+            X.needs(RegManager, UseDefUnit::ReadSrcReg, idx);
+        }
+    }
+
+    //@todo: schedule non-spec insts to operate on this cycle
+    // as long as all previous insts are done
+    if ( inst->isNonSpeculative() ) {
+        // skip execution of non speculative insts until later
+    } else if ( inst->isMemRef() ) {
+        if ( inst->isLoad() ) {
+            X.needs(AGEN, AGENUnit::GenerateAddr);
+        }
+    } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
+        X.needs(MDU, MultDivUnit::StartMultDiv);
+    } else {
+        X.needs(ExecUnit, ExecutionUnit::ExecuteInst);
+    }
+
+    // MEMORY
+    if (!inst->isNonSpeculative()) {
+        if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
+            M.needs(MDU, MultDivUnit::EndMultDiv);
+        }
+
+        if ( inst->isLoad() ) {
+            M.needs(DCache, CacheUnit::InitiateReadData);
+            if (inst->splitInst)
+                M.needs(DCache, CacheUnit::InitSecondSplitRead);
+        } else if ( inst->isStore() ) {
+            for (int i = 1; i < inst->numSrcRegs(); i++ ) {
+                M.needs(RegManager, UseDefUnit::ReadSrcReg, i);
+            }
+            M.needs(AGEN, AGENUnit::GenerateAddr);
+            M.needs(DCache, CacheUnit::InitiateWriteData);
+            if (inst->splitInst)
+                M.needs(DCache, CacheUnit::InitSecondSplitWrite);
+        }
+    }
+
+    // WRITEBACK
+    if (!inst->isNonSpeculative()) {
+        if ( inst->isLoad() ) {
+            W.needs(DCache, CacheUnit::CompleteReadData);
+            if (inst->splitInst)
+                W.needs(DCache, CacheUnit::CompleteSecondSplitRead);
+        } else if ( inst->isStore() ) {
+            W.needs(DCache, CacheUnit::CompleteWriteData);
+            if (inst->splitInst)
+                W.needs(DCache, CacheUnit::CompleteSecondSplitWrite);
+        }
+    } else {
+        // Finally, Execute Speculative Data
+        if (inst->isMemRef()) {
+            if (inst->isLoad()) {
+                W.needs(AGEN, AGENUnit::GenerateAddr);
+                W.needs(DCache, CacheUnit::InitiateReadData);
+                if (inst->splitInst)
+                    W.needs(DCache, CacheUnit::InitSecondSplitRead);
+                W.needs(DCache, CacheUnit::CompleteReadData);
+                if (inst->splitInst)
+                    W.needs(DCache, CacheUnit::CompleteSecondSplitRead);
+            } else if (inst->isStore()) {
+                if ( inst->numSrcRegs() >= 2 ) {
+                    W.needs(RegManager, UseDefUnit::ReadSrcReg, 1);
+                }
+                W.needs(AGEN, AGENUnit::GenerateAddr);
+                W.needs(DCache, CacheUnit::InitiateWriteData);
+                if (inst->splitInst)
+                    W.needs(DCache, CacheUnit::InitSecondSplitWrite);
+                W.needs(DCache, CacheUnit::CompleteWriteData);
+                if (inst->splitInst)
+                    W.needs(DCache, CacheUnit::CompleteSecondSplitWrite);
+            }
+        } else {
+            W.needs(ExecUnit, ExecutionUnit::ExecuteInst);
+        }
+    }
+
+    W.needs(Grad, GraduationUnit::CheckFault);
+
+    for (int idx=0; idx < inst->numDestRegs(); idx++) {
+        W.needs(RegManager, UseDefUnit::WriteDestReg, idx);
+    }
+
+    if (inst->isControl())
+        W.needs(BPred, BranchPredictor::UpdatePredictor);
+
+    W.needs(Grad, GraduationUnit::GraduateInst);
+
+    // Insert Back Schedule into our cache of
+    // resource schedules
+    addToSkedCache(inst, res_sked);
+
+    DPRINTF(SkedCache, "Back End Sked Created for instruction: %s (%08p)\n",
+            inst->instName(), inst->getMachInst());
+    res_sked->print();
+
+    return res_sked;
+}
 
 void
 InOrderCPU::regStats()
@@ -369,13 +599,6 @@ InOrderCPU::regStats()
     /* Register the Resource Pool's stats here.*/
     resPool->regStats();
 
-#ifdef DEBUG
-    maxResReqCount
-        .name(name() + ".maxResReqCount")
-        .desc("Maximum number of live resource requests in CPU")
-        .prereq(maxResReqCount);   
-#endif
-
     /* Register for each Pipeline Stage */
     for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) {
         pipelineStage[stage_num]->regStats();
@@ -451,16 +674,21 @@ InOrderCPU::regStats()
     committedInsts
         .init(numThreads)
         .name(name() + ".committedInsts")
-        .desc("Number of Instructions Simulated (Per-Thread)");
+        .desc("Number of Instructions committed (Per-Thread)");
+
+    committedOps
+        .init(numThreads)
+        .name(name() + ".committedOps")
+        .desc("Number of Ops committed (Per-Thread)");
 
     smtCommittedInsts
         .init(numThreads)
         .name(name() + ".smtCommittedInsts")
-        .desc("Number of SMT Instructions Simulated (Per-Thread)");
+        .desc("Number of SMT Instructions committed (Per-Thread)");
 
     totalCommittedInsts
         .name(name() + ".committedInsts_total")
-        .desc("Number of Instructions Simulated (Total)");
+        .desc("Number of Instructions committed (Total)");
 
     cpi
         .name(name() + ".cpi")
@@ -509,8 +737,9 @@ InOrderCPU::tick()
 
     ++numCycles;
 
+    checkForInterrupts();
+
     bool pipes_idle = true;
-    
     //Tick each of the stages
     for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
         pipelineStage[stNum]->tick();
@@ -530,8 +759,7 @@ InOrderCPU::tick()
     }
     activityRec.advance();
    
-    // Any squashed requests, events, or insts then remove them now
-    cleanUpRemovedReqs();
+    // Any squashed events, or insts then remove them now
     cleanUpRemovedEvents();
     cleanUpRemovedInsts();
 
@@ -539,17 +767,17 @@ InOrderCPU::tick()
     if (!tickEvent.scheduled()) {
         if (_status == SwitchedOut) {
             // increment stat
-            lastRunningCycle = curTick;
+            lastRunningCycle = curCycle();
         } else if (!activityRec.active()) {
             DPRINTF(InOrderCPU, "sleeping CPU.\n");
-            lastRunningCycle = curTick;
+            lastRunningCycle = curCycle();
             timesIdled++;
         } else {
-            //Tick next_tick = curTick + cycles(1);
+            //Tick next_tick = curTick() + cycles(1);
             //tickEvent.schedule(next_tick);
-            mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1));
+            schedule(&tickEvent, clockEdge(Cycles(1)));
             DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
-                    nextCycle(curTick + 1));
+                    clockEdge(Cycles(1)));
         }
     }
 
@@ -561,50 +789,50 @@ InOrderCPU::tick()
 void
 InOrderCPU::init()
 {
-    if (!deferRegistration) {
-        registerThreadContexts();
-    }
+    BaseCPU::init();
 
-    // Set inSyscall so that the CPU doesn't squash when initially
-    // setting up registers.
-    for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = true;
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        // Set noSquashFromTC so that the CPU doesn't squash when initially
+        // setting up registers.
+        thread[tid]->noSquashFromTC = true;
+        // Initialise the ThreadContext's memory proxies
+        thread[tid]->initMemProxies(thread[tid]->getTC());
+    }
 
-#if FULL_SYSTEM
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        ThreadContext *src_tc = threadContexts[tid];
-        TheISA::initCPU(src_tc, src_tc->contextId());
+    if (FullSystem && !params()->switched_out) {
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            ThreadContext *src_tc = threadContexts[tid];
+            TheISA::initCPU(src_tc, src_tc->contextId());
+        }
     }
-#endif
 
-    // Clear inSyscall.
+    // Clear noSquashFromTC.
     for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = false;
+        thread[tid]->noSquashFromTC = false;
 
     // Call Initializiation Routine for Resource Pool
     resPool->init();
 }
 
 void
-InOrderCPU::reset()
+InOrderCPU::verifyMemoryMode() const
 {
-    for (int i = 0; i < numThreads; i++) {
-        isa[i].reset(coreType, numThreads,
-                     1/*numVirtProcs*/, dynamic_cast<BaseCPU*>(this));
+    if (!system->isTimingMode()) {
+        fatal("The in-order CPU requires the memory system to be in "
+              "'timing' mode.\n");
     }
 }
 
-Port*
-InOrderCPU::getPort(const std::string &if_name, int idx)
-{
-    return resPool->getPort(if_name, idx);
-}
-
-#if FULL_SYSTEM
 Fault
 InOrderCPU::hwrei(ThreadID tid)
 {
-    panic("hwrei: Unimplemented");
+#if THE_ISA == ALPHA_ISA
+    // Need to clear the lock flag upon returning from an interrupt.
+    setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid);
+
+    thread[tid]->kernelStats->hwrei();
+    // FIXME: XXX check for interrupts? XXX
+#endif
     
     return NoFault;
 }
@@ -613,20 +841,73 @@ InOrderCPU::hwrei(ThreadID tid)
 bool
 InOrderCPU::simPalCheck(int palFunc, ThreadID tid)
 {
-    panic("simPalCheck: Unimplemented");
+#if THE_ISA == ALPHA_ISA
+    if (this->thread[tid]->kernelStats)
+        this->thread[tid]->kernelStats->callpal(palFunc,
+                                                this->threadContexts[tid]);
+
+    switch (palFunc) {
+      case PAL::halt:
+        halt();
+        if (--System::numSystemsRunning == 0)
+            exitSimLoop("all cpus halted");
+        break;
 
+      case PAL::bpt:
+      case PAL::bugchk:
+        if (this->system->breakpoint())
+            return false;
+        break;
+    }
+#endif
     return true;
 }
 
+void
+InOrderCPU::checkForInterrupts()
+{
+    for (int i = 0; i < threadContexts.size(); i++) {
+        ThreadContext *tc = threadContexts[i];
+
+        if (interrupts->checkInterrupts(tc)) {
+            Fault interrupt = interrupts->getInterrupt(tc);
+
+            if (interrupt != NoFault) {
+                DPRINTF(Interrupt, "Processing Intterupt for [tid:%i].\n",
+                        tc->threadId());
+
+                ThreadID tid = tc->threadId();
+                interrupts->updateIntrInfo(tc);
+
+                // Squash from Last Stage in Pipeline
+                unsigned last_stage = NumStages - 1;
+                dummyTrapInst[tid]->squashingStage = last_stage;
+                pipelineStage[last_stage]->setupSquash(dummyTrapInst[tid],
+                                                       tid);
+
+                // By default, setupSquash will always squash from stage + 1
+                pipelineStage[BackEndStartStage - 1]->setupSquash(dummyTrapInst[tid],
+                                                                  tid);
+
+                // Schedule Squash Through-out Resource Pool
+                resPool->scheduleEvent(
+                    (InOrderCPU::CPUEventType)ResourcePool::SquashAll,
+                    dummyTrapInst[tid], Cycles(0));
+
+                // Finally, Setup Trap to happen at end of cycle
+                trapContext(interrupt, tid, dummyTrapInst[tid]);
+            }
+        }
+    }
+}
 
 Fault
 InOrderCPU::getInterrupts()
 {
     // Check if there are any outstanding interrupts
-    return this->interrupts->getInterrupt(this->threadContexts[0]);
+    return interrupts->getInterrupt(threadContexts[0]);
 }
 
-
 void
 InOrderCPU::processInterrupts(Fault interrupt)
 {
@@ -637,39 +918,32 @@ InOrderCPU::processInterrupts(Fault interrupt)
     // @todo: Allow other threads to handle interrupts.
 
     assert(interrupt != NoFault);
-    this->interrupts->updateIntrInfo(this->threadContexts[0]);
+    interrupts->updateIntrInfo(threadContexts[0]);
 
     DPRINTF(InOrderCPU, "Interrupt %s being handled\n", interrupt->name());
-    this->trap(interrupt, 0);
-}
 
-
-void
-InOrderCPU::updateMemPorts()
-{
-    // Update all ThreadContext's memory ports (Functional/Virtual
-    // Ports)
-    ThreadID size = thread.size();
-    for (ThreadID i = 0; i < size; ++i)
-        thread[i]->connectMemPorts(thread[i]->getTC());
+    // Note: Context ID ok here? Impl. of FS mode needs to revisit this
+    trap(interrupt, threadContexts[0]->contextId(), dummyBufferInst);
 }
-#endif
 
 void
-InOrderCPU::trap(Fault fault, ThreadID tid, int delay)
+InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                        Cycles delay)
 {
-    //@ Squash Pipeline during TRAP
-    scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(Trap, fault, tid, inst, delay);
+    trapPending[tid] = true;
 }
 
 void
-InOrderCPU::trapCPU(Fault fault, ThreadID tid)
+InOrderCPU::trap(Fault fault, ThreadID tid, DynInstPtr inst)
 {
-    fault->invoke(tcBase(tid));
+    fault->invoke(tcBase(tid), inst->staticInst);
+    removePipelineStalls(tid);
 }
 
 void 
-InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid,
+                               Cycles delay)
 {
     scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
 }
@@ -697,25 +971,21 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num,
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
                              ThreadID tid, DynInstPtr inst, 
-                             unsigned delay, unsigned event_pri_offset)
+                             Cycles delay, CPUEventPri event_pri)
 {
     CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
-                                       event_pri_offset);
+                                       event_pri);
 
-    Tick sked_tick = nextCycle(curTick + ticks(delay));
-    if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
-                eventNames[c_event], curTick + delay, tid);
-        mainEventQueue.schedule(cpu_event, sked_tick);
-    } else {
-        cpu_event->process();
-        cpuEventRemoveList.push(cpu_event);
-    }
+    Tick sked_tick = clockEdge(delay);
+    DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+            eventNames[c_event], curTick() + delay, tid);
+    schedule(cpu_event, sked_tick);
 
     // Broadcast event to the Resource Pool
     // Need to reset tid just in case this is a dummy instruction
     inst->setTid(tid);        
-    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
+    // @todo: Is this really right? Should the delay not be passed on?
+    resPool->scheduleEvent(c_event, inst, Cycles(0), 0, tid);
 }
 
 bool
@@ -755,7 +1025,7 @@ InOrderCPU::activateNextReadyThread()
         activateThread(ready_tid);                        
         
         // Activate in Resource Pool
-        resPool->activateAll(ready_tid);
+        resPool->activateThread(ready_tid);
         
         list<ThreadID>::iterator ready_it =
             std::find(readyThreads.begin(), readyThreads.end(), ready_tid);
@@ -800,7 +1070,7 @@ InOrderCPU::activateThread(ThreadID tid)
         
         activateThreadInPipeline(tid);
 
-        thread[tid]->lastActivate = curTick;            
+        thread[tid]->lastActivate = curTick();            
 
         tcBase(tid)->setStatus(ThreadContext::Active);    
 
@@ -819,7 +1089,7 @@ InOrderCPU::activateThreadInPipeline(ThreadID tid)
 }
 
 void
-InOrderCPU::deactivateContext(ThreadID tid, int delay)
+InOrderCPU::deactivateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
 
@@ -875,7 +1145,6 @@ InOrderCPU::updateThreadPriority()
         //DEFAULT TO ROUND ROBIN SCHEME
         //e.g. Move highest priority to end of thread list
         list<ThreadID>::iterator list_begin = activeThreads.begin();
-        list<ThreadID>::iterator list_end   = activeThreads.end();
 
         unsigned high_thread = *list_begin;
 
@@ -902,7 +1171,7 @@ InOrderCPU::tickThreadStats()
 }
 
 void
-InOrderCPU::activateContext(ThreadID tid, int delay)
+InOrderCPU::activateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
@@ -917,15 +1186,12 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 }
 
 void
-InOrderCPU::activateNextReadyContext(int delay)
+InOrderCPU::activateNextReadyContext(Cycles delay)
 {
     DPRINTF(InOrderCPU,"Activating next ready thread\n");
 
-    // NOTE: Add 5 to the event priority so that we always activate
-    // threads after we've finished deactivating, squashing,etc.
-    // other threads
     scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], 
-                     delay, 5);
+                     delay, ActivateNextReadyThread_Pri);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -935,11 +1201,11 @@ InOrderCPU::activateNextReadyContext(int delay)
 }
 
 void
-InOrderCPU::haltContext(ThreadID tid, int delay)
+InOrderCPU::haltContext(ThreadID tid)
 {
     DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
 
-    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid]);
 
     activityRec.activity();
 }
@@ -960,9 +1226,9 @@ InOrderCPU::haltThread(ThreadID tid)
 }
 
 void
-InOrderCPU::suspendContext(ThreadID tid, int delay)
+InOrderCPU::suspendContext(ThreadID tid)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid]);
 }
 
 void
@@ -972,7 +1238,7 @@ InOrderCPU::suspendThread(ThreadID tid)
             tid);
     deactivateThread(tid);
     suspendedThreads.push_back(tid);    
-    thread[tid]->lastSuspend = curTick;    
+    thread[tid]->lastSuspend = curTick();    
 
     tcBase(tid)->setStatus(ThreadContext::Suspended);    
 }
@@ -992,83 +1258,118 @@ InOrderCPU::getPipeStage(int stage_num)
     return pipelineStage[stage_num];
 }
 
-uint64_t
-InOrderCPU::readPC(ThreadID tid)
-{
-    return PC[tid];
-}
-
 
-void
-InOrderCPU::setPC(Addr new_PC, ThreadID tid)
+RegIndex
+InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegClass &reg_type, ThreadID tid)
 {
-    PC[tid] = new_PC;
-}
+    RegIndex rel_idx;
 
+    reg_type = regIdxToClass(reg_idx, &rel_idx);
 
-uint64_t
-InOrderCPU::readNextPC(ThreadID tid)
-{
-    return nextPC[tid];
-}
+    switch (reg_type) {
+      case IntRegClass:
+        return isa[tid]->flattenIntIndex(rel_idx);
 
+      case FloatRegClass:
+        return isa[tid]->flattenFloatIndex(rel_idx);
 
-void
-InOrderCPU::setNextPC(uint64_t new_NPC, ThreadID tid)
-{
-    nextPC[tid] = new_NPC;
-}
-
+      case MiscRegClass:
+        return rel_idx;
 
-uint64_t
-InOrderCPU::readNextNPC(ThreadID tid)
-{
-    return nextNPC[tid];
-}
-
-
-void
-InOrderCPU::setNextNPC(uint64_t new_NNPC, ThreadID tid)
-{
-    nextNPC[tid] = new_NNPC;
+      default:
+        panic("register %d out of range\n", reg_idx);
+    }
 }
 
 uint64_t
-InOrderCPU::readIntReg(int reg_idx, ThreadID tid)
+InOrderCPU::readIntReg(RegIndex reg_idx, ThreadID tid)
 {
+    DPRINTF(IntRegs, "[tid:%i]: Reading Int. Reg %i as %x\n",
+            tid, reg_idx, intRegs[tid][reg_idx]);
+
     return intRegs[tid][reg_idx];
 }
 
 FloatReg
-InOrderCPU::readFloatReg(int reg_idx, ThreadID tid)
+InOrderCPU::readFloatReg(RegIndex reg_idx, ThreadID tid)
 {
+    DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n",
+            tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]);
+
     return floatRegs.f[tid][reg_idx];
 }
 
 FloatRegBits
-InOrderCPU::readFloatRegBits(int reg_idx, ThreadID tid)
-{;
+InOrderCPU::readFloatRegBits(RegIndex reg_idx, ThreadID tid)
+{
+    DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n",
+            tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]);
+
     return floatRegs.i[tid][reg_idx];
 }
 
+CCReg
+InOrderCPU::readCCReg(RegIndex reg_idx, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Reading CC. Reg %i as %x\n",
+            tid, reg_idx, ccRegs[tid][reg_idx]);
+
+    return ccRegs[tid][reg_idx];
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
+}
+
 void
-InOrderCPU::setIntReg(int reg_idx, uint64_t val, ThreadID tid)
+InOrderCPU::setIntReg(RegIndex reg_idx, uint64_t val, ThreadID tid)
 {
-    intRegs[tid][reg_idx] = val;
+    if (reg_idx == TheISA::ZeroReg) {
+        DPRINTF(IntRegs, "[tid:%i]: Ignoring Setting of ISA-ZeroReg "
+                "(Int. Reg %i) to %x\n", tid, reg_idx, val);
+        return;
+    } else {
+        DPRINTF(IntRegs, "[tid:%i]: Setting Int. Reg %i to %x\n",
+                tid, reg_idx, val);
+
+        intRegs[tid][reg_idx] = val;
+    }
 }
 
 
 void
-InOrderCPU::setFloatReg(int reg_idx, FloatReg val, ThreadID tid)
+InOrderCPU::setFloatReg(RegIndex reg_idx, FloatReg val, ThreadID tid)
 {
     floatRegs.f[tid][reg_idx] = val;
+    DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to "
+            "%x, %08f\n",
+            tid, reg_idx,
+            floatRegs.i[tid][reg_idx],
+            floatRegs.f[tid][reg_idx]);
 }
 
 
 void
-InOrderCPU::setFloatRegBits(int reg_idx, FloatRegBits val, ThreadID tid)
+InOrderCPU::setFloatRegBits(RegIndex reg_idx, FloatRegBits val, ThreadID tid)
 {
     floatRegs.i[tid][reg_idx] = val;
+    DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to "
+            "%x, %08f\n",
+            tid, reg_idx,
+            floatRegs.i[tid][reg_idx],
+            floatRegs.f[tid][reg_idx]);
+}
+
+void
+InOrderCPU::setCCReg(RegIndex reg_idx, CCReg val, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Setting CC. Reg %i to %x\n",
+            tid, reg_idx, val);
+    ccRegs[tid][reg_idx] = val;
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
 }
 
 uint64_t
@@ -1079,18 +1380,25 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {                   
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
         // Integer Register File
-        return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        return readIntReg(rel_idx, tid);
+
+      case FloatRegClass:
         // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        return readFloatRegBits(reg_idx, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        return readMiscReg(reg_idx, tid);  // Misc. Register File
+        return readFloatRegBits(rel_idx, tid);
+
+      case MiscRegClass:
+        return readMiscReg(rel_idx, tid);  // Misc. Register File
+
+      default:
+        panic("register %d out of range\n", reg_idx);
     }
 }
+
 void
 InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
                               ThreadID tid)
@@ -1100,44 +1408,54 @@ InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {            // Integer Register File
-        setIntReg(reg_idx, val, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {   // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        setFloatRegBits(reg_idx, val, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        setMiscReg(reg_idx, val, tid); // Misc. Register File
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
+        setIntReg(rel_idx, val, tid);
+        break;
+
+      case FloatRegClass:
+        setFloatRegBits(rel_idx, val, tid);
+        break;
+
+      case CCRegClass:
+        setCCReg(rel_idx, val, tid);
+        break;
+
+      case MiscRegClass:
+        setMiscReg(rel_idx, val, tid); // Misc. Register File
+        break;
     }
 }
 
 MiscReg
 InOrderCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid)
 {
-    return isa[tid].readMiscRegNoEffect(misc_reg);
+    return isa[tid]->readMiscRegNoEffect(misc_reg);
 }
 
 MiscReg
 InOrderCPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    return isa[tid].readMiscReg(misc_reg, tcBase(tid));
+    return isa[tid]->readMiscReg(misc_reg, tcBase(tid));
 }
 
 void
 InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscRegNoEffect(misc_reg, val);
+    isa[tid]->setMiscRegNoEffect(misc_reg, val);
 }
 
 void
 InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscReg(misc_reg, val, tcBase(tid));
+    isa[tid]->setMiscReg(misc_reg, val, tcBase(tid));
 }
 
 
 InOrderCPU::ListIt
-InOrderCPU::addInst(DynInstPtr &inst)
+InOrderCPU::addInst(DynInstPtr inst)
 {
     ThreadID tid = inst->readTid();
 
@@ -1146,6 +1464,24 @@ InOrderCPU::addInst(DynInstPtr &inst)
     return --(instList[tid].end());
 }
 
+InOrderCPU::ListIt
+InOrderCPU::findInst(InstSeqNum seq_num, ThreadID tid)
+{
+    ListIt it = instList[tid].begin();
+    ListIt end = instList[tid].end();
+
+    while (it != end) {
+        if ((*it)->seqNum == seq_num)
+            return it;
+        else if ((*it)->seqNum > seq_num)
+            break;
+
+        it++;
+    }
+
+    return instList[tid].end();
+}
+
 void 
 InOrderCPU::updateContextSwitchStats()
 {
@@ -1158,18 +1494,22 @@ InOrderCPU::updateContextSwitchStats()
 void
 InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 {
-    // Set the CPU's PCs - This contributes to the precise state of the CPU 
+    // Set the nextPC to be fetched if this is the last instruction
+    // committed
+    // ========
+    // This contributes to the precise state of the CPU
     // which can be used when restoring a thread to the CPU after after any
     // type of context switching activity (fork, exception, etc.)
-    setPC(inst->readPC(), tid);
-    setNextPC(inst->readNextPC(), tid);
-    setNextNPC(inst->readNextNPC(), tid);
+    TheISA::PCState comm_pc = inst->pcState();
+    lastCommittedPC[tid] = comm_pc;
+    TheISA::advancePC(comm_pc, inst->staticInst);
+    pcState(comm_pc, tid);
 
+    //@todo: may be unnecessary with new-ISA-specific branch handling code
     if (inst->isControl()) {
         thread[tid]->lastGradIsBranch = true;
-        thread[tid]->lastBranchPC = inst->readPC();
-        thread[tid]->lastBranchNextPC = inst->readNextPC();
-        thread[tid]->lastBranchNextNPC = inst->readNextNPC();        
+        thread[tid]->lastBranchPC = inst->pcState();
+        TheISA::advancePC(thread[tid]->lastBranchPC, inst->staticInst);
     } else {
         thread[tid]->lastGradIsBranch = false;
     }
@@ -1177,7 +1517,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 
     // Finalize Trace Data For Instruction
     if (inst->traceData) {
-        //inst->traceData->setCycle(curTick);
+        //inst->traceData->setCycle(curTick());
         inst->traceData->setFetchSeq(inst->seqNum);
         //inst->traceData->setCPSeq(cpu->tcBase(tid)->numInst);
         inst->traceData->dump();
@@ -1190,19 +1530,26 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
+    thread[tid]->numOp++;
 
     // Increment thread-state's instruction stats
     thread[tid]->numInsts++;
+    thread[tid]->numOps++;
 
     // Count committed insts per thread stats
-    committedInsts[tid]++;
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        committedInsts[tid]++;
 
-    // Count total insts committed stat
-    totalCommittedInsts++;
+        // Count total insts committed stat
+        totalCommittedInsts++;
+    }
+
+    committedOps[tid]++;
 
     // Count SMT-committed insts per thread stat
     if (numActiveThreads() > 1) {
-        smtCommittedInsts[tid]++;
+        if (!inst->isMicroop() || inst->isLastMicroop())
+            smtCommittedInsts[tid]++;
     }
 
     // Instruction-Mix Stats
@@ -1223,12 +1570,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     }
 
     // Check for instruction-count-based events.
-    comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
-
-    // Broadcast to other resources an instruction
-    // has been completed
-    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
-                           0, 0, tid);
+    comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
@@ -1237,43 +1579,43 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
 // currently unused function, but substitute repetitive code w/this function
 // call
 void
-InOrderCPU::addToRemoveList(DynInstPtr &inst)
+InOrderCPU::addToRemoveList(DynInstPtr inst)
 {
     removeInstsThisCycle = true;
     if (!inst->isRemoveList()) {            
-        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %s "
                 "[sn:%lli] to remove list\n",
-                inst->threadNumber, inst->readPC(), inst->seqNum);
+                inst->threadNumber, inst->pcState(), inst->seqNum);
         inst->setRemoveList();        
         removeList.push(inst->getInstListIt());
     }  else {
-        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %s "
                 "[sn:%lli], already remove list\n",
-                inst->threadNumber, inst->readPC(), inst->seqNum);
+                inst->threadNumber, inst->pcState(), inst->seqNum);
     }
     
 }
 
 void
-InOrderCPU::removeInst(DynInstPtr &inst)
+InOrderCPU::removeInst(DynInstPtr inst)
 {
-    DPRINTF(InOrderCPU, "Removing graduated instruction [tid:%i] PC %#x "
+    DPRINTF(InOrderCPU, "Removing graduated instruction [tid:%i] PC %s "
             "[sn:%lli]\n",
-            inst->threadNumber, inst->readPC(), inst->seqNum);
+            inst->threadNumber, inst->pcState(), inst->seqNum);
 
     removeInstsThisCycle = true;
 
     // Remove the instruction.
     if (!inst->isRemoveList()) {            
-        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %s "
                 "[sn:%lli] to remove list\n",
-                inst->threadNumber, inst->readPC(), inst->seqNum);
+                inst->threadNumber, inst->pcState(), inst->seqNum);
         inst->setRemoveList();        
         removeList.push(inst->getInstListIt());
     } else {
-        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %s "
                 "[sn:%lli], already on remove list\n",
-                inst->threadNumber, inst->readPC(), inst->seqNum);
+                inst->threadNumber, inst->pcState(), inst->seqNum);
     }
 
 }
@@ -1308,29 +1650,31 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
 
 
 inline void
-InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
+InOrderCPU::squashInstIt(const ListIt inst_it, ThreadID tid)
 {
-    if ((*instIt)->threadNumber == tid) {
+    DynInstPtr inst = (*inst_it);
+    if (inst->threadNumber == tid) {
         DPRINTF(InOrderCPU, "Squashing instruction, "
-                "[tid:%i] [sn:%lli] PC %#x\n",
-                (*instIt)->threadNumber,
-                (*instIt)->seqNum,
-                (*instIt)->readPC());
+                "[tid:%i] [sn:%lli] PC %s\n",
+                inst->threadNumber,
+                inst->seqNum,
+                inst->pcState());
 
-        (*instIt)->setSquashed();
+        inst->setSquashed();
+        archRegDepMap[tid].remove(inst);
 
-        if (!(*instIt)->isRemoveList()) {            
-            DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+        if (!inst->isRemoveList()) {
+            DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %s "
                     "[sn:%lli] to remove list\n",
-                    (*instIt)->threadNumber, (*instIt)->readPC(), 
-                    (*instIt)->seqNum);
-            (*instIt)->setRemoveList();        
-            removeList.push(instIt);
+                    inst->threadNumber, inst->pcState(),
+                    inst->seqNum);
+            inst->setRemoveList();
+            removeList.push(inst_it);
         } else {
             DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i]"
-                    " PC %#x [sn:%lli], already on remove list\n",
-                    (*instIt)->threadNumber, (*instIt)->readPC(), 
-                    (*instIt)->seqNum);
+                    " PC %s [sn:%lli], already on remove list\n",
+                    inst->threadNumber, inst->pcState(),
+                    inst->seqNum);
         }
     
     }
@@ -1343,34 +1687,26 @@ InOrderCPU::cleanUpRemovedInsts()
 {
     while (!removeList.empty()) {
         DPRINTF(InOrderCPU, "Removing instruction, "
-                "[tid:%i] [sn:%lli] PC %#x\n",
+                "[tid:%i] [sn:%lli] PC %s\n",
                 (*removeList.front())->threadNumber,
                 (*removeList.front())->seqNum,
-               (*removeList.front())->readPC());
+               (*removeList.front())->pcState());
 
         DynInstPtr inst = *removeList.front();
         ThreadID tid = inst->threadNumber;
 
-        // Make Sure Resource Schedule Is Emptied Out
-        ThePipeline::ResSchedule *inst_sched = &inst->resSched;
-        while (!inst_sched->empty()) {
-            ThePipeline::ScheduleEntry* sch_entry = inst_sched->top();
-            inst_sched->pop();
-            delete sch_entry;
-        }
-
         // Remove From Register Dependency Map, If Necessary
-        archRegDepMap[(*removeList.front())->threadNumber].
-            remove((*removeList.front()));
-
+        // archRegDepMap[tid].remove(inst);
 
         // Clear if Non-Speculative
         if (inst->staticInst &&
-              inst->seqNum == nonSpecSeqNum[tid] &&
-                nonSpecInstActive[tid] == true) {
+            inst->seqNum == nonSpecSeqNum[tid] &&
+            nonSpecInstActive[tid] == true) {
             nonSpecInstActive[tid] = false;
         }
 
+        inst->onInstList = false;
+
         instList[tid].erase(removeList.front());
 
         removeList.pop();
@@ -1379,28 +1715,6 @@ InOrderCPU::cleanUpRemovedInsts()
     removeInstsThisCycle = false;
 }
 
-void
-InOrderCPU::cleanUpRemovedReqs()
-{
-    while (!reqRemoveList.empty()) {
-        ResourceRequest *res_req = reqRemoveList.front();
-
-        DPRINTF(RefCount, "[tid:%i] [sn:%lli]: Removing Request "
-                "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n",
-                res_req->inst->threadNumber,
-                res_req->inst->seqNum,
-                res_req->getStageNum(),
-                res_req->res->name(),
-                (res_req->isCompleted()) ?
-                res_req->getComplSlot() : res_req->getSlot(),
-                res_req->isCompleted());
-
-        reqRemoveList.pop();
-
-        delete res_req;
-    }
-}
-
 void
 InOrderCPU::cleanUpRemovedEvents()
 {
@@ -1422,9 +1736,10 @@ InOrderCPU::dumpInsts()
     cprintf("Dumping Instruction List\n");
 
     while (inst_list_it != instList[0].end()) {
-        cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
+        cprintf("Instruction:%i\nPC:%s\n[tid:%i]\n[sn:%lli]\nIssued:%i\n"
                 "Squashed:%i\n\n",
-                num, (*inst_list_it)->readPC(), (*inst_list_it)->threadNumber,
+                num, (*inst_list_it)->pcState(),
+                (*inst_list_it)->threadNumber,
                 (*inst_list_it)->seqNum, (*inst_list_it)->isIssued(),
                 (*inst_list_it)->isSquashed());
         inst_list_it++;
@@ -1442,7 +1757,9 @@ InOrderCPU::wakeCPU()
 
     DPRINTF(Activity, "Waking up CPU\n");
 
-    Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle);
+    Tick extra_cycles = curCycle() - lastRunningCycle;
+    if (extra_cycles != 0)
+        --extra_cycles;
 
     idleCycles += extra_cycles;    
     for (int stage_num = 0; stage_num < NumStages; stage_num++) {
@@ -1451,25 +1768,40 @@ InOrderCPU::wakeCPU()
 
     numCycles += extra_cycles;
 
-    mainEventQueue.schedule(&tickEvent, nextCycle(curTick));
+    schedule(&tickEvent, clockEdge());
 }
 
-#if FULL_SYSTEM
-
+// Lots of copied full system code...place into BaseCPU class?
 void
 InOrderCPU::wakeup()
 {
-    if (this->thread[0]->status() != ThreadContext::Suspended)
+    if (thread[0]->status() != ThreadContext::Suspended)
         return;
 
-    this->wakeCPU();
+    wakeCPU();
 
     DPRINTF(Quiesce, "Suspended Processor woken\n");
-    this->threadContexts[0]->activate();
+    threadContexts[0]->activate();
+}
+
+void
+InOrderCPU::syscallContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                           Cycles delay)
+{
+    // Syscall must be non-speculative, so squash from last stage
+    unsigned squash_stage = NumStages - 1;
+    inst->setSquashInfo(squash_stage);
+
+    // Squash In Pipeline Stage
+    pipelineStage[squash_stage]->setupSquash(inst, tid);
+
+    // Schedule Squash Through-out Resource Pool
+    resPool->scheduleEvent(
+        (InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst,
+        Cycles(0));
+    scheduleCpuEvent(Syscall, fault, tid, inst, delay, Syscall_Pri);
 }
-#endif
 
-#if !FULL_SYSTEM
 void
 InOrderCPU::syscall(int64_t callnum, ThreadID tid)
 {
@@ -1491,28 +1823,11 @@ InOrderCPU::syscall(int64_t callnum, ThreadID tid)
     // Clear Non-Speculative Block Variable
     nonSpecInstActive[tid] = false;
 }
-#endif
-
-void
-InOrderCPU::prefetch(DynInstPtr inst)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->prefetch(inst);
-}
-
-void
-InOrderCPU::writeHint(DynInstPtr inst)
-{
-    Resource *mem_res = resPool->getResource(dataPortIdx);
-    return mem_res->writeHint(inst);
-}
-
 
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    CacheUnit *itb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
+    CacheUnit *itb_res = resPool->getInstUnit();
     return itb_res->tlb();
 }
 
@@ -1520,140 +1835,26 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    CacheUnit *dtb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return dtb_res->tlb();
-}
-
-template <class T>
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
-{
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res = 
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-
-    return cache_res->read(inst, addr, data, flags);
-}
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, Twin32_t &data, unsigned flags);
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, Twin64_t &data, unsigned flags);
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, uint64_t &data, unsigned flags);
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, uint32_t &data, unsigned flags);
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, uint16_t &data, unsigned flags);
-
-template
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, uint8_t &data, unsigned flags);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, double &data, unsigned flags)
-{
-    return read(inst, addr, *(uint64_t*)&data, flags);
+    return resPool->getDataUnit()->tlb();
 }
 
-template<>
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, float &data, unsigned flags)
-{
-    return read(inst, addr, *(uint32_t*)&data, flags);
-}
-
-
-template<>
-Fault
-InOrderCPU::read(DynInstPtr inst, Addr addr, int32_t &data, unsigned flags)
+TheISA::Decoder *
+InOrderCPU::getDecoderPtr(unsigned tid)
 {
-    return read(inst, addr, (uint32_t&)data, flags);
+    return resPool->getInstUnit()->decoder[tid];
 }
 
-template <class T>
 Fault
-InOrderCPU::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
-                  uint64_t *write_res)
+InOrderCPU::read(DynInstPtr inst, Addr addr,
+                 uint8_t *data, unsigned size, unsigned flags)
 {
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return cache_res->write(inst, data, addr, flags, write_res);
+    return resPool->getDataUnit()->read(inst, addr, data, size, flags);
 }
 
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, Twin32_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, Twin64_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, uint64_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, uint32_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, uint16_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-template
-Fault
-InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr,
-                       unsigned flags, uint64_t *res);
-
-#endif //DOXYGEN_SHOULD_SKIP_THIS
-
-template<>
-Fault
-InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
-                  uint64_t *res)
-{
-    return write(inst, *(uint64_t*)&data, addr, flags, res);
-}
-
-template<>
-Fault
-InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
-                  uint64_t *res)
-{
-    return write(inst, *(uint32_t*)&data, addr, flags, res);
-}
-
-
-template<>
 Fault
-InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
-                  uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size,
+                  Addr addr, unsigned flags, uint64_t *write_res)
 {
-    return write(inst, (uint32_t)data, addr, flags, res);
+    return resPool->getDataUnit()->write(inst, data, size, addr, flags,
+                                         write_res);
 }