arch, cpu: Add support for flattening misc register indexes.
[gem5.git] / src / cpu / inorder / cpu.cc
index e0e4d9a158c486b9463371e6adf22b8f61b08493..5a02f94d9bcc969b8ca0fcf80ab666860596aea8 100644 (file)
@@ -1,4 +1,17 @@
 /*
+ * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2007 MIPS Technologies, Inc.
  * All rights reserved.
  *
@@ -33,8 +46,8 @@
 
 #include "arch/utility.hh"
 #include "base/bigint.hh"
-#include "config/full_system.hh"
 #include "config/the_isa.hh"
+#include "cpu/inorder/resources/cache_unit.hh"
 #include "cpu/inorder/resources/resource_list.hh"
 #include "cpu/inorder/cpu.hh"
 #include "cpu/inorder/first_stage.hh"
 #include "cpu/activity.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/quiesce_event.hh"
+#include "cpu/reg_class.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "debug/Activity.hh"
 #include "debug/InOrderCPU.hh"
+#include "debug/InOrderCachePort.hh"
+#include "debug/Interrupt.hh"
+#include "debug/Quiesce.hh"
 #include "debug/RefCount.hh"
 #include "debug/SkedCache.hh"
-#include "mem/translating_port.hh"
 #include "params/InOrderCPU.hh"
+#include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/stat_control.hh"
-
-#if FULL_SYSTEM
-#include "cpu/quiesce_event.hh"
 #include "sim/system.hh"
-#endif
 
 #if THE_ISA == ALPHA_ISA
 #include "arch/alpha/osfpal.hh"
@@ -70,6 +84,30 @@ using namespace std;
 using namespace TheISA;
 using namespace ThePipeline;
 
+InOrderCPU::CachePort::CachePort(CacheUnit *_cacheUnit,
+                                 const std::string& name) :
+    MasterPort(_cacheUnit->name() + name, _cacheUnit->cpu),
+    cacheUnit(_cacheUnit)
+{ }
+
+bool
+InOrderCPU::CachePort::recvTimingResp(Packet *pkt)
+{
+    if (pkt->isError())
+        DPRINTF(InOrderCachePort, "Got error packet back for address: %x\n",
+                pkt->getAddr());
+    else
+        cacheUnit->processCacheCompletion(pkt);
+
+    return true;
+}
+
+void
+InOrderCPU::CachePort::recvRetry()
+{
+    cacheUnit->recvRetry();
+}
+
 InOrderCPU::TickEvent::TickEvent(InOrderCPU *c)
   : Event(CPU_Tick_Pri), cpu(c)
 { }
@@ -83,16 +121,15 @@ InOrderCPU::TickEvent::process()
 
 
 const char *
-InOrderCPU::TickEvent::description()
+InOrderCPU::TickEvent::description() const
 {
     return "InOrderCPU tick event";
 }
 
 InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
                                Fault fault, ThreadID _tid, DynInstPtr inst,
-                               unsigned event_pri_offset)
-    : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)),
-      cpu(_cpu)
+                               CPUEventPri event_pri)
+    : Event(event_pri), cpu(_cpu)
 {
     setEvent(e_type, fault, _tid, inst);
 }
@@ -106,7 +143,7 @@ std::string InOrderCPU::eventNames[NumCPUEvents] =
     "HaltThread",
     "SuspendThread",
     "Trap",
-    "InstGraduated",
+    "Syscall",
     "SquashFromMemStall",
     "UpdatePCs"
 };
@@ -118,6 +155,7 @@ InOrderCPU::CPUEvent::process()
     {
       case ActivateThread:
         cpu->activateThread(tid);
+        cpu->resPool->activateThread(tid);
         break;
 
       case ActivateNextReadyThread:
@@ -126,24 +164,35 @@ InOrderCPU::CPUEvent::process()
 
       case DeactivateThread:
         cpu->deactivateThread(tid);
+        cpu->resPool->deactivateThread(tid);
         break;
 
       case HaltThread:
         cpu->haltThread(tid);
+        cpu->resPool->deactivateThread(tid);
         break;
 
       case SuspendThread: 
         cpu->suspendThread(tid);
+        cpu->resPool->suspendThread(tid);
         break;
 
       case SquashFromMemStall:
         cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid);
+        cpu->resPool->squashDueToMemStall(inst, inst->squashingStage,
+                                          inst->seqNum, tid);
         break;
 
       case Trap:
         DPRINTF(InOrderCPU, "Trapping CPU\n");
         cpu->trap(fault, tid, inst);
         cpu->resPool->trap(fault, tid, inst);
+        cpu->trapPending[tid] = false;
+        break;
+
+      case Syscall:
+        cpu->syscall(inst->syscallNum, tid);
+        cpu->resPool->trap(fault, tid, inst);
         break;
 
       default:
@@ -156,16 +205,16 @@ InOrderCPU::CPUEvent::process()
     
 
 const char *
-InOrderCPU::CPUEvent::description()
+InOrderCPU::CPUEvent::description() const
 {
     return "InOrderCPU event";
 }
 
 void
-InOrderCPU::CPUEvent::scheduleEvent(int delay)
+InOrderCPU::CPUEvent::scheduleEvent(Cycles delay)
 {
     assert(!scheduled() || squashed());
-    cpu->reschedule(this, cpu->nextCycle(curTick() + cpu->ticks(delay)), true);
+    cpu->reschedule(this, cpu->clockEdge(delay), true);
 }
 
 void
@@ -182,98 +231,90 @@ InOrderCPU::InOrderCPU(Params *params)
       _status(Idle),
       tickEvent(this),
       stageWidth(params->stageWidth),
+      resPool(new ResourcePool(this, params)),
+      isa(numThreads, NULL),
       timeBuffer(2 , 2),
+      dataPort(resPool->getDataUnit(), ".dcache_port"),
+      instPort(resPool->getInstUnit(), ".icache_port"),
       removeInstsThisCycle(false),
       activityRec(params->name, NumStages, 10, params->activity),
-#if FULL_SYSTEM
       system(params->system),
-      physmem(system->physmem),
-#endif // FULL_SYSTEM
 #ifdef DEBUG
       cpuEventNum(0),
       resReqCount(0),
 #endif // DEBUG
-      switchCount(0),
-      deferRegistration(false/*params->deferRegistration*/),
+      drainCount(0),
       stageTracing(params->stageTracing),
+      lastRunningCycle(0),
       instsPerSwitch(0)
 {    
-    ThreadID active_threads;
     cpu_params = params;
 
-    resPool = new ResourcePool(this, params);
-
     // Resize for Multithreading CPUs
     thread.resize(numThreads);
 
-#if FULL_SYSTEM
-    active_threads = 1;
-#else
-    active_threads = params->workload.size();
-
-    if (active_threads > MaxThreads) {
-        panic("Workload Size too large. Increase the 'MaxThreads'"
-              "in your InOrder implementation or "
-              "edit your workload size.");
-    }
+    ThreadID active_threads = params->workload.size();
+    if (FullSystem) {
+        active_threads = 1;
+    } else {
+        active_threads = params->workload.size();
 
-    
-    if (active_threads > 1) {
-        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
-
-        if (threadModel == SMT) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
-        } else if (threadModel == SwitchOnCacheMiss) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to "
-                    "Switch On Cache Miss\n");
+        if (active_threads > MaxThreads) {
+            panic("Workload Size too large. Increase the 'MaxThreads'"
+                  "in your InOrder implementation or "
+                  "edit your workload size.");
         }
-        
-    } else {
-        threadModel = Single;
-    }
-     
-        
-    
-#endif
 
-    // Bind the fetch & data ports from the resource pool.
-    fetchPortIdx = resPool->getPortIdx(params->fetchMemPort);
-    if (fetchPortIdx == 0) {
-        fatal("Unable to find port to fetch instructions from.\n");
-    }
 
-    dataPortIdx = resPool->getPortIdx(params->dataMemPort);
-    if (dataPortIdx == 0) {
-        fatal("Unable to find port for data.\n");
+        if (active_threads > 1) {
+            threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+            if (threadModel == SMT) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");
+            } else if (threadModel == SwitchOnCacheMiss) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to "
+                        "Switch On Cache Miss\n");
+            }
+
+        } else {
+            threadModel = Single;
+        }
     }
 
     for (ThreadID tid = 0; tid < numThreads; ++tid) {
-#if FULL_SYSTEM
-        // SMT is not supported in FS mode yet.
-        assert(numThreads == 1);
-        thread[tid] = new Thread(this, 0);
-#else
-        if (tid < (ThreadID)params->workload.size()) {
-            DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
-                    tid, params->workload[tid]->prog_fname);
-            thread[tid] =
-                new Thread(this, tid, params->workload[tid]);
+        isa[tid] = params->isa[tid];
+        pc[tid].set(0);
+        lastCommittedPC[tid].set(0);
+
+        if (FullSystem) {
+            // SMT is not supported in FS mode yet.
+            assert(numThreads == 1);
+            thread[tid] = new Thread(this, 0, NULL);
         } else {
-            //Allocate Empty thread so M5 can use later
-            //when scheduling threads to CPU
-            Process* dummy_proc = params->workload[0];
-            thread[tid] = new Thread(this, tid, dummy_proc);
+            if (tid < (ThreadID)params->workload.size()) {
+                DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
+                        tid, params->workload[tid]->progName());
+                thread[tid] =
+                    new Thread(this, tid, params->workload[tid]);
+            } else {
+                //Allocate Empty thread so M5 can use later
+                //when scheduling threads to CPU
+                Process* dummy_proc = params->workload[0];
+                thread[tid] = new Thread(this, tid, dummy_proc);
+            }
+
+            // Eventually set this with parameters...
+            asid[tid] = tid;
         }
-        
-        // Eventually set this with parameters...
-        asid[tid] = tid;
-#endif
 
         // Setup the TC that will serve as the interface to the threads/CPU.
         InOrderThreadContext *tc = new InOrderThreadContext;
         tc->cpu = this;
         tc->thread = thread[tid];
 
+        // Setup quiesce event.
+        this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc);
+
         // Give the thread the TC.
         thread[tid]->tc = tc;
         thread[tid]->setFuncExeInst(0);
@@ -320,7 +361,10 @@ InOrderCPU::InOrderCPU(Params *params)
 
         memset(intRegs[tid], 0, sizeof(intRegs[tid]));
         memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
-        isa[tid].clear();
+#ifdef ISA_HAS_CC_REGS
+        memset(ccRegs[tid], 0, sizeof(ccRegs[tid]));
+#endif
+        isa[tid]->clear();
 
         // Define dummy instructions and resource requests to be used.
         dummyInst[tid] = new InOrderDynInst(this, 
@@ -330,6 +374,25 @@ InOrderCPU::InOrderCPU(Params *params)
                                             asid[tid]);
 
         dummyReq[tid] = new ResourceRequest(resPool->getResource(0));
+
+
+        if (FullSystem) {
+            // Use this dummy inst to force squashing behind every instruction
+            // in pipeline
+            dummyTrapInst[tid] = new InOrderDynInst(this, NULL, 0, 0, 0);
+            dummyTrapInst[tid]->seqNum = 0;
+            dummyTrapInst[tid]->squashSeqNum = 0;
+            dummyTrapInst[tid]->setTid(tid);
+        }
+
+        trapPending[tid] = false;
+
+    }
+
+    // InOrderCPU always requires an interrupt controller.
+    if (!params->switched_out && !interrupts) {
+        fatal("InOrderCPU %s has no interrupt controller.\n"
+              "Ensure createInterruptController() is called.\n", name());
     }
 
     dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
@@ -342,18 +405,15 @@ InOrderCPU::InOrderCPU(Params *params)
 
     endOfSkedIt = skedCache.end();
     frontEndSked = createFrontEndSked();
-    
-    lastRunningCycle = curTick();
+    faultSked = createFaultSked();
 
-    // Reset CPU to reset state.
-#if FULL_SYSTEM
-    Fault resetFault = new ResetFault();
-    resetFault->invoke(tcBase());
-#endif
+    lastRunningCycle = curCycle();
 
+    lockAddr = 0;
+    lockFlag = false;
     
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
-    scheduleTickEvent(0);
+    scheduleTickEvent(Cycles(0));
 }
 
 InOrderCPU::~InOrderCPU()
@@ -391,8 +451,18 @@ InOrderCPU::createFrontEndSked()
     D.needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
 
 
-    DPRINTF(SkedCache, "Resource Sked created for instruction \"front_end\"\n");
+    DPRINTF(SkedCache, "Resource Sked created for instruction Front End\n");
+
+    return res_sked;
+}
 
+RSkedPtr
+InOrderCPU::createFaultSked()
+{
+    RSkedPtr res_sked = new ResourceSked();
+    StageScheduler W(res_sked, NumStages - 1);
+    W.needs(Grad, GraduationUnit::CheckFault);
+    DPRINTF(SkedCache, "Resource Sked created for instruction Faults\n");
     return res_sked;
 }
 
@@ -501,12 +571,17 @@ InOrderCPU::createBackEndSked(DynInstPtr inst)
         }
     }
 
-    W.needs(Grad, GraduationUnit::GraduateInst);
+    W.needs(Grad, GraduationUnit::CheckFault);
 
     for (int idx=0; idx < inst->numDestRegs(); idx++) {
         W.needs(RegManager, UseDefUnit::WriteDestReg, idx);
     }
 
+    if (inst->isControl())
+        W.needs(BPred, BranchPredictor::UpdatePredictor);
+
+    W.needs(Grad, GraduationUnit::GraduateInst);
+
     // Insert Back Schedule into our cache of
     // resource schedules
     addToSkedCache(inst, res_sked);
@@ -599,16 +674,21 @@ InOrderCPU::regStats()
     committedInsts
         .init(numThreads)
         .name(name() + ".committedInsts")
-        .desc("Number of Instructions Simulated (Per-Thread)");
+        .desc("Number of Instructions committed (Per-Thread)");
+
+    committedOps
+        .init(numThreads)
+        .name(name() + ".committedOps")
+        .desc("Number of Ops committed (Per-Thread)");
 
     smtCommittedInsts
         .init(numThreads)
         .name(name() + ".smtCommittedInsts")
-        .desc("Number of SMT Instructions Simulated (Per-Thread)");
+        .desc("Number of SMT Instructions committed (Per-Thread)");
 
     totalCommittedInsts
         .name(name() + ".committedInsts_total")
-        .desc("Number of Instructions Simulated (Total)");
+        .desc("Number of Instructions committed (Total)");
 
     cpi
         .name(name() + ".cpi")
@@ -657,8 +737,9 @@ InOrderCPU::tick()
 
     ++numCycles;
 
+    checkForInterrupts();
+
     bool pipes_idle = true;
-    
     //Tick each of the stages
     for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
         pipelineStage[stNum]->tick();
@@ -686,17 +767,17 @@ InOrderCPU::tick()
     if (!tickEvent.scheduled()) {
         if (_status == SwitchedOut) {
             // increment stat
-            lastRunningCycle = curTick();
+            lastRunningCycle = curCycle();
         } else if (!activityRec.active()) {
             DPRINTF(InOrderCPU, "sleeping CPU.\n");
-            lastRunningCycle = curTick();
+            lastRunningCycle = curCycle();
             timesIdled++;
         } else {
             //Tick next_tick = curTick() + cycles(1);
             //tickEvent.schedule(next_tick);
-            schedule(&tickEvent, nextCycle(curTick() + 1));
+            schedule(&tickEvent, clockEdge(Cycles(1)));
             DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
-                    nextCycle(curTick() + 1));
+                    clockEdge(Cycles(1)));
         }
     }
 
@@ -708,41 +789,50 @@ InOrderCPU::tick()
 void
 InOrderCPU::init()
 {
-    if (!deferRegistration) {
-        registerThreadContexts();
-    }
+    BaseCPU::init();
 
-    // Set inSyscall so that the CPU doesn't squash when initially
-    // setting up registers.
-    for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = true;
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        // Set noSquashFromTC so that the CPU doesn't squash when initially
+        // setting up registers.
+        thread[tid]->noSquashFromTC = true;
+        // Initialise the ThreadContext's memory proxies
+        thread[tid]->initMemProxies(thread[tid]->getTC());
+    }
 
-#if FULL_SYSTEM
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        ThreadContext *src_tc = threadContexts[tid];
-        TheISA::initCPU(src_tc, src_tc->contextId());
+    if (FullSystem && !params()->switched_out) {
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            ThreadContext *src_tc = threadContexts[tid];
+            TheISA::initCPU(src_tc, src_tc->contextId());
+        }
     }
-#endif
 
-    // Clear inSyscall.
+    // Clear noSquashFromTC.
     for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = false;
+        thread[tid]->noSquashFromTC = false;
 
     // Call Initializiation Routine for Resource Pool
     resPool->init();
 }
 
-Port*
-InOrderCPU::getPort(const std::string &if_name, int idx)
+void
+InOrderCPU::verifyMemoryMode() const
 {
-    return resPool->getPort(if_name, idx);
+    if (!system->isTimingMode()) {
+        fatal("The in-order CPU requires the memory system to be in "
+              "'timing' mode.\n");
+    }
 }
 
-#if FULL_SYSTEM
 Fault
 InOrderCPU::hwrei(ThreadID tid)
 {
-    panic("hwrei: Unimplemented");
+#if THE_ISA == ALPHA_ISA
+    // Need to clear the lock flag upon returning from an interrupt.
+    setMiscRegNoEffect(AlphaISA::MISCREG_LOCKFLAG, false, tid);
+
+    thread[tid]->kernelStats->hwrei();
+    // FIXME: XXX check for interrupts? XXX
+#endif
     
     return NoFault;
 }
@@ -751,11 +841,65 @@ InOrderCPU::hwrei(ThreadID tid)
 bool
 InOrderCPU::simPalCheck(int palFunc, ThreadID tid)
 {
-    panic("simPalCheck: Unimplemented");
+#if THE_ISA == ALPHA_ISA
+    if (this->thread[tid]->kernelStats)
+        this->thread[tid]->kernelStats->callpal(palFunc,
+                                                this->threadContexts[tid]);
+
+    switch (palFunc) {
+      case PAL::halt:
+        halt();
+        if (--System::numSystemsRunning == 0)
+            exitSimLoop("all cpus halted");
+        break;
 
+      case PAL::bpt:
+      case PAL::bugchk:
+        if (this->system->breakpoint())
+            return false;
+        break;
+    }
+#endif
     return true;
 }
 
+void
+InOrderCPU::checkForInterrupts()
+{
+    for (int i = 0; i < threadContexts.size(); i++) {
+        ThreadContext *tc = threadContexts[i];
+
+        if (interrupts->checkInterrupts(tc)) {
+            Fault interrupt = interrupts->getInterrupt(tc);
+
+            if (interrupt != NoFault) {
+                DPRINTF(Interrupt, "Processing Intterupt for [tid:%i].\n",
+                        tc->threadId());
+
+                ThreadID tid = tc->threadId();
+                interrupts->updateIntrInfo(tc);
+
+                // Squash from Last Stage in Pipeline
+                unsigned last_stage = NumStages - 1;
+                dummyTrapInst[tid]->squashingStage = last_stage;
+                pipelineStage[last_stage]->setupSquash(dummyTrapInst[tid],
+                                                       tid);
+
+                // By default, setupSquash will always squash from stage + 1
+                pipelineStage[BackEndStartStage - 1]->setupSquash(dummyTrapInst[tid],
+                                                                  tid);
+
+                // Schedule Squash Through-out Resource Pool
+                resPool->scheduleEvent(
+                    (InOrderCPU::CPUEventType)ResourcePool::SquashAll,
+                    dummyTrapInst[tid], Cycles(0));
+
+                // Finally, Setup Trap to happen at end of cycle
+                trapContext(interrupt, tid, dummyTrapInst[tid]);
+            }
+        }
+    }
+}
 
 Fault
 InOrderCPU::getInterrupts()
@@ -764,7 +908,6 @@ InOrderCPU::getInterrupts()
     return interrupts->getInterrupt(threadContexts[0]);
 }
 
-
 void
 InOrderCPU::processInterrupts(Fault interrupt)
 {
@@ -783,32 +926,24 @@ InOrderCPU::processInterrupts(Fault interrupt)
     trap(interrupt, threadContexts[0]->contextId(), dummyBufferInst);
 }
 
-
-void
-InOrderCPU::updateMemPorts()
-{
-    // Update all ThreadContext's memory ports (Functional/Virtual
-    // Ports)
-    ThreadID size = thread.size();
-    for (ThreadID i = 0; i < size; ++i)
-        thread[i]->connectMemPorts(thread[i]->getTC());
-}
-#endif
-
 void
-InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst, int delay)
+InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                        Cycles delay)
 {
     scheduleCpuEvent(Trap, fault, tid, inst, delay);
+    trapPending[tid] = true;
 }
 
 void
 InOrderCPU::trap(Fault fault, ThreadID tid, DynInstPtr inst)
 {
     fault->invoke(tcBase(tid), inst->staticInst);
+    removePipelineStalls(tid);
 }
 
 void 
-InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid,
+                               Cycles delay)
 {
     scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
 }
@@ -836,25 +971,21 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num,
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
                              ThreadID tid, DynInstPtr inst, 
-                             unsigned delay, unsigned event_pri_offset)
+                             Cycles delay, CPUEventPri event_pri)
 {
     CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
-                                       event_pri_offset);
+                                       event_pri);
 
-    Tick sked_tick = nextCycle(curTick() + ticks(delay));
-    if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
-                eventNames[c_event], curTick() + delay, tid);
-        schedule(cpu_event, sked_tick);
-    } else {
-        cpu_event->process();
-        cpuEventRemoveList.push(cpu_event);
-    }
+    Tick sked_tick = clockEdge(delay);
+    DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+            eventNames[c_event], curTick() + delay, tid);
+    schedule(cpu_event, sked_tick);
 
     // Broadcast event to the Resource Pool
     // Need to reset tid just in case this is a dummy instruction
     inst->setTid(tid);        
-    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
+    // @todo: Is this really right? Should the delay not be passed on?
+    resPool->scheduleEvent(c_event, inst, Cycles(0), 0, tid);
 }
 
 bool
@@ -894,7 +1025,7 @@ InOrderCPU::activateNextReadyThread()
         activateThread(ready_tid);                        
         
         // Activate in Resource Pool
-        resPool->activateAll(ready_tid);
+        resPool->activateThread(ready_tid);
         
         list<ThreadID>::iterator ready_it =
             std::find(readyThreads.begin(), readyThreads.end(), ready_tid);
@@ -958,7 +1089,7 @@ InOrderCPU::activateThreadInPipeline(ThreadID tid)
 }
 
 void
-InOrderCPU::deactivateContext(ThreadID tid, int delay)
+InOrderCPU::deactivateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
 
@@ -1014,7 +1145,6 @@ InOrderCPU::updateThreadPriority()
         //DEFAULT TO ROUND ROBIN SCHEME
         //e.g. Move highest priority to end of thread list
         list<ThreadID>::iterator list_begin = activeThreads.begin();
-        list<ThreadID>::iterator list_end   = activeThreads.end();
 
         unsigned high_thread = *list_begin;
 
@@ -1041,7 +1171,7 @@ InOrderCPU::tickThreadStats()
 }
 
 void
-InOrderCPU::activateContext(ThreadID tid, int delay)
+InOrderCPU::activateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
@@ -1056,15 +1186,12 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 }
 
 void
-InOrderCPU::activateNextReadyContext(int delay)
+InOrderCPU::activateNextReadyContext(Cycles delay)
 {
     DPRINTF(InOrderCPU,"Activating next ready thread\n");
 
-    // NOTE: Add 5 to the event priority so that we always activate
-    // threads after we've finished deactivating, squashing,etc.
-    // other threads
     scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], 
-                     delay, 5);
+                     delay, ActivateNextReadyThread_Pri);
 
     // Be sure to signal that there's some activity so the CPU doesn't
     // deschedule itself.
@@ -1074,11 +1201,11 @@ InOrderCPU::activateNextReadyContext(int delay)
 }
 
 void
-InOrderCPU::haltContext(ThreadID tid, int delay)
+InOrderCPU::haltContext(ThreadID tid)
 {
     DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
 
-    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid]);
 
     activityRec.activity();
 }
@@ -1099,9 +1226,9 @@ InOrderCPU::haltThread(ThreadID tid)
 }
 
 void
-InOrderCPU::suspendContext(ThreadID tid, int delay)
+InOrderCPU::suspendContext(ThreadID tid)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid]);
 }
 
 void
@@ -1131,19 +1258,26 @@ InOrderCPU::getPipeStage(int stage_num)
     return pipelineStage[stage_num];
 }
 
+
 RegIndex
-InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegType &reg_type, ThreadID tid)
-{
-    if (reg_idx < FP_Base_DepTag) {
-        reg_type = IntType;
-        return isa[tid].flattenIntIndex(reg_idx);
-    } else if (reg_idx < Ctrl_Base_DepTag) {
-        reg_type = FloatType;
-        reg_idx -= FP_Base_DepTag;
-        return isa[tid].flattenFloatIndex(reg_idx);
-    } else {
-        reg_type = MiscType;
-        return reg_idx - TheISA::Ctrl_Base_DepTag;
+InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegClass &reg_type, ThreadID tid)
+{
+    RegIndex rel_idx;
+
+    reg_type = regIdxToClass(reg_idx, &rel_idx);
+
+    switch (reg_type) {
+      case IntRegClass:
+        return isa[tid]->flattenIntIndex(rel_idx);
+
+      case FloatRegClass:
+        return isa[tid]->flattenFloatIndex(rel_idx);
+
+      case MiscRegClass:
+        return rel_idx;
+
+      default:
+        panic("register %d out of range\n", reg_idx);
     }
 }
 
@@ -1159,15 +1293,34 @@ InOrderCPU::readIntReg(RegIndex reg_idx, ThreadID tid)
 FloatReg
 InOrderCPU::readFloatReg(RegIndex reg_idx, ThreadID tid)
 {
+    DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n",
+            tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]);
+
     return floatRegs.f[tid][reg_idx];
 }
 
 FloatRegBits
 InOrderCPU::readFloatRegBits(RegIndex reg_idx, ThreadID tid)
-{;
+{
+    DPRINTF(FloatRegs, "[tid:%i]: Reading Float Reg %i as %x, %08f\n",
+            tid, reg_idx, floatRegs.i[tid][reg_idx], floatRegs.f[tid][reg_idx]);
+
     return floatRegs.i[tid][reg_idx];
 }
 
+CCReg
+InOrderCPU::readCCReg(RegIndex reg_idx, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Reading CC. Reg %i as %x\n",
+            tid, reg_idx, ccRegs[tid][reg_idx]);
+
+    return ccRegs[tid][reg_idx];
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
+}
+
 void
 InOrderCPU::setIntReg(RegIndex reg_idx, uint64_t val, ThreadID tid)
 {
@@ -1188,6 +1341,11 @@ void
 InOrderCPU::setFloatReg(RegIndex reg_idx, FloatReg val, ThreadID tid)
 {
     floatRegs.f[tid][reg_idx] = val;
+    DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to "
+            "%x, %08f\n",
+            tid, reg_idx,
+            floatRegs.i[tid][reg_idx],
+            floatRegs.f[tid][reg_idx]);
 }
 
 
@@ -1195,6 +1353,23 @@ void
 InOrderCPU::setFloatRegBits(RegIndex reg_idx, FloatRegBits val, ThreadID tid)
 {
     floatRegs.i[tid][reg_idx] = val;
+    DPRINTF(FloatRegs, "[tid:%i]: Setting Float. Reg %i bits to "
+            "%x, %08f\n",
+            tid, reg_idx,
+            floatRegs.i[tid][reg_idx],
+            floatRegs.f[tid][reg_idx]);
+}
+
+void
+InOrderCPU::setCCReg(RegIndex reg_idx, CCReg val, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Setting CC. Reg %i to %x\n",
+            tid, reg_idx, val);
+    ccRegs[tid][reg_idx] = val;
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
 }
 
 uint64_t
@@ -1205,18 +1380,25 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {                   
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
         // Integer Register File
-        return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        return readIntReg(rel_idx, tid);
+
+      case FloatRegClass:
         // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        return readFloatRegBits(reg_idx, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        return readMiscReg(reg_idx, tid);  // Misc. Register File
+        return readFloatRegBits(rel_idx, tid);
+
+      case MiscRegClass:
+        return readMiscReg(rel_idx, tid);  // Misc. Register File
+
+      default:
+        panic("register %d out of range\n", reg_idx);
     }
 }
+
 void
 InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
                               ThreadID tid)
@@ -1226,39 +1408,49 @@ InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {            // Integer Register File
-        setIntReg(reg_idx, val, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {   // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        setFloatRegBits(reg_idx, val, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        setMiscReg(reg_idx, val, tid); // Misc. Register File
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
+        setIntReg(rel_idx, val, tid);
+        break;
+
+      case FloatRegClass:
+        setFloatRegBits(rel_idx, val, tid);
+        break;
+
+      case CCRegClass:
+        setCCReg(rel_idx, val, tid);
+        break;
+
+      case MiscRegClass:
+        setMiscReg(rel_idx, val, tid); // Misc. Register File
+        break;
     }
 }
 
 MiscReg
 InOrderCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid)
 {
-    return isa[tid].readMiscRegNoEffect(misc_reg);
+    return isa[tid]->readMiscRegNoEffect(misc_reg);
 }
 
 MiscReg
 InOrderCPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    return isa[tid].readMiscReg(misc_reg, tcBase(tid));
+    return isa[tid]->readMiscReg(misc_reg, tcBase(tid));
 }
 
 void
 InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscRegNoEffect(misc_reg, val);
+    isa[tid]->setMiscRegNoEffect(misc_reg, val);
 }
 
 void
 InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscReg(misc_reg, val, tcBase(tid));
+    isa[tid]->setMiscReg(misc_reg, val, tcBase(tid));
 }
 
 
@@ -1338,19 +1530,26 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
+    thread[tid]->numOp++;
 
     // Increment thread-state's instruction stats
     thread[tid]->numInsts++;
+    thread[tid]->numOps++;
 
     // Count committed insts per thread stats
-    committedInsts[tid]++;
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        committedInsts[tid]++;
 
-    // Count total insts committed stat
-    totalCommittedInsts++;
+        // Count total insts committed stat
+        totalCommittedInsts++;
+    }
+
+    committedOps[tid]++;
 
     // Count SMT-committed insts per thread stat
     if (numActiveThreads() > 1) {
-        smtCommittedInsts[tid]++;
+        if (!inst->isMicroop() || inst->isLastMicroop())
+            smtCommittedInsts[tid]++;
     }
 
     // Instruction-Mix Stats
@@ -1371,12 +1570,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     }
 
     // Check for instruction-count-based events.
-    comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
-
-    // Broadcast to other resources an instruction
-    // has been completed
-    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
-                           0, 0, tid);
+    comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
@@ -1456,29 +1650,31 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
 
 
 inline void
-InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
+InOrderCPU::squashInstIt(const ListIt inst_it, ThreadID tid)
 {
-    if ((*instIt)->threadNumber == tid) {
+    DynInstPtr inst = (*inst_it);
+    if (inst->threadNumber == tid) {
         DPRINTF(InOrderCPU, "Squashing instruction, "
                 "[tid:%i] [sn:%lli] PC %s\n",
-                (*instIt)->threadNumber,
-                (*instIt)->seqNum,
-                (*instIt)->pcState());
+                inst->threadNumber,
+                inst->seqNum,
+                inst->pcState());
 
-        (*instIt)->setSquashed();
+        inst->setSquashed();
+        archRegDepMap[tid].remove(inst);
 
-        if (!(*instIt)->isRemoveList()) {            
+        if (!inst->isRemoveList()) {
             DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %s "
                     "[sn:%lli] to remove list\n",
-                    (*instIt)->threadNumber, (*instIt)->pcState(),
-                    (*instIt)->seqNum);
-            (*instIt)->setRemoveList();        
-            removeList.push(instIt);
+                    inst->threadNumber, inst->pcState(),
+                    inst->seqNum);
+            inst->setRemoveList();
+            removeList.push(inst_it);
         } else {
             DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i]"
                     " PC %s [sn:%lli], already on remove list\n",
-                    (*instIt)->threadNumber, (*instIt)->pcState(),
-                    (*instIt)->seqNum);
+                    inst->threadNumber, inst->pcState(),
+                    inst->seqNum);
         }
     
     }
@@ -1500,7 +1696,7 @@ InOrderCPU::cleanUpRemovedInsts()
         ThreadID tid = inst->threadNumber;
 
         // Remove From Register Dependency Map, If Necessary
-        archRegDepMap[tid].remove(inst);
+        // archRegDepMap[tid].remove(inst);
 
         // Clear if Non-Speculative
         if (inst->staticInst &&
@@ -1561,7 +1757,9 @@ InOrderCPU::wakeCPU()
 
     DPRINTF(Activity, "Waking up CPU\n");
 
-    Tick extra_cycles = tickToCycles((curTick() - 1) - lastRunningCycle);
+    Tick extra_cycles = curCycle() - lastRunningCycle;
+    if (extra_cycles != 0)
+        --extra_cycles;
 
     idleCycles += extra_cycles;    
     for (int stage_num = 0; stage_num < NumStages; stage_num++) {
@@ -1570,11 +1768,10 @@ InOrderCPU::wakeCPU()
 
     numCycles += extra_cycles;
 
-    schedule(&tickEvent, nextCycle(curTick()));
+    schedule(&tickEvent, clockEdge());
 }
 
-#if FULL_SYSTEM
-
+// Lots of copied full system code...place into BaseCPU class?
 void
 InOrderCPU::wakeup()
 {
@@ -1586,9 +1783,25 @@ InOrderCPU::wakeup()
     DPRINTF(Quiesce, "Suspended Processor woken\n");
     threadContexts[0]->activate();
 }
-#endif
 
-#if !FULL_SYSTEM
+void
+InOrderCPU::syscallContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                           Cycles delay)
+{
+    // Syscall must be non-speculative, so squash from last stage
+    unsigned squash_stage = NumStages - 1;
+    inst->setSquashInfo(squash_stage);
+
+    // Squash In Pipeline Stage
+    pipelineStage[squash_stage]->setupSquash(inst, tid);
+
+    // Schedule Squash Through-out Resource Pool
+    resPool->scheduleEvent(
+        (InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst,
+        Cycles(0));
+    scheduleCpuEvent(Syscall, fault, tid, inst, delay, Syscall_Pri);
+}
+
 void
 InOrderCPU::syscall(int64_t callnum, ThreadID tid)
 {
@@ -1610,13 +1823,11 @@ InOrderCPU::syscall(int64_t callnum, ThreadID tid)
     // Clear Non-Speculative Block Variable
     nonSpecInstActive[tid] = false;
 }
-#endif
 
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    CacheUnit *itb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
+    CacheUnit *itb_res = resPool->getInstUnit();
     return itb_res->tlb();
 }
 
@@ -1624,30 +1835,26 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    CacheUnit *dtb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return dtb_res->tlb();
+    return resPool->getDataUnit()->tlb();
+}
+
+TheISA::Decoder *
+InOrderCPU::getDecoderPtr(unsigned tid)
+{
+    return resPool->getInstUnit()->decoder[tid];
 }
 
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr,
                  uint8_t *data, unsigned size, unsigned flags)
 {
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res = 
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-
-    return cache_res->read(inst, addr, data, size, flags);
+    return resPool->getDataUnit()->read(inst, addr, data, size, flags);
 }
 
 Fault
 InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size,
                   Addr addr, unsigned flags, uint64_t *write_res)
 {
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return cache_res->write(inst, data, size, addr, flags, write_res);
+    return resPool->getDataUnit()->write(inst, data, size, addr, flags,
+                                         write_res);
 }