arch, cpu: Add support for flattening misc register indexes.
[gem5.git] / src / cpu / inorder / cpu.cc
index 365b2f18a190f983149412c349f2d597bcc87898..5a02f94d9bcc969b8ca0fcf80ab666860596aea8 100644 (file)
@@ -1,4 +1,17 @@
 /*
+ * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2007 MIPS Technologies, Inc.
  * All rights reserved.
  *
@@ -33,8 +46,8 @@
 
 #include "arch/utility.hh"
 #include "base/bigint.hh"
-#include "config/full_system.hh"
 #include "config/the_isa.hh"
+#include "cpu/inorder/resources/cache_unit.hh"
 #include "cpu/inorder/resources/resource_list.hh"
 #include "cpu/inorder/cpu.hh"
 #include "cpu/inorder/first_stage.hh"
 #include "cpu/activity.hh"
 #include "cpu/base.hh"
 #include "cpu/exetrace.hh"
+#include "cpu/quiesce_event.hh"
+#include "cpu/reg_class.hh"
 #include "cpu/simple_thread.hh"
 #include "cpu/thread_context.hh"
 #include "debug/Activity.hh"
 #include "debug/InOrderCPU.hh"
+#include "debug/InOrderCachePort.hh"
+#include "debug/Interrupt.hh"
+#include "debug/Quiesce.hh"
 #include "debug/RefCount.hh"
 #include "debug/SkedCache.hh"
-#include "mem/translating_port.hh"
 #include "params/InOrderCPU.hh"
+#include "sim/full_system.hh"
 #include "sim/process.hh"
 #include "sim/stat_control.hh"
-
-#if FULL_SYSTEM
-#include "cpu/quiesce_event.hh"
 #include "sim/system.hh"
-#endif
 
 #if THE_ISA == ALPHA_ISA
 #include "arch/alpha/osfpal.hh"
@@ -70,6 +84,30 @@ using namespace std;
 using namespace TheISA;
 using namespace ThePipeline;
 
+InOrderCPU::CachePort::CachePort(CacheUnit *_cacheUnit,
+                                 const std::string& name) :
+    MasterPort(_cacheUnit->name() + name, _cacheUnit->cpu),
+    cacheUnit(_cacheUnit)
+{ }
+
+bool
+InOrderCPU::CachePort::recvTimingResp(Packet *pkt)
+{
+    if (pkt->isError())
+        DPRINTF(InOrderCachePort, "Got error packet back for address: %x\n",
+                pkt->getAddr());
+    else
+        cacheUnit->processCacheCompletion(pkt);
+
+    return true;
+}
+
+void
+InOrderCPU::CachePort::recvRetry()
+{
+    cacheUnit->recvRetry();
+}
+
 InOrderCPU::TickEvent::TickEvent(InOrderCPU *c)
   : Event(CPU_Tick_Pri), cpu(c)
 { }
@@ -83,7 +121,7 @@ InOrderCPU::TickEvent::process()
 
 
 const char *
-InOrderCPU::TickEvent::description()
+InOrderCPU::TickEvent::description() const
 {
     return "InOrderCPU tick event";
 }
@@ -149,14 +187,14 @@ InOrderCPU::CPUEvent::process()
         DPRINTF(InOrderCPU, "Trapping CPU\n");
         cpu->trap(fault, tid, inst);
         cpu->resPool->trap(fault, tid, inst);
+        cpu->trapPending[tid] = false;
         break;
 
-#if !FULL_SYSTEM
       case Syscall:
         cpu->syscall(inst->syscallNum, tid);
         cpu->resPool->trap(fault, tid, inst);
         break;
-#endif
+
       default:
         fatal("Unrecognized Event Type %s", eventNames[cpuEventType]);    
     }
@@ -167,16 +205,16 @@ InOrderCPU::CPUEvent::process()
     
 
 const char *
-InOrderCPU::CPUEvent::description()
+InOrderCPU::CPUEvent::description() const
 {
     return "InOrderCPU event";
 }
 
 void
-InOrderCPU::CPUEvent::scheduleEvent(int delay)
+InOrderCPU::CPUEvent::scheduleEvent(Cycles delay)
 {
     assert(!scheduled() || squashed());
-    cpu->reschedule(this, cpu->nextCycle(curTick() + cpu->ticks(delay)), true);
+    cpu->reschedule(this, cpu->clockEdge(delay), true);
 }
 
 void
@@ -193,105 +231,89 @@ InOrderCPU::InOrderCPU(Params *params)
       _status(Idle),
       tickEvent(this),
       stageWidth(params->stageWidth),
+      resPool(new ResourcePool(this, params)),
+      isa(numThreads, NULL),
       timeBuffer(2 , 2),
+      dataPort(resPool->getDataUnit(), ".dcache_port"),
+      instPort(resPool->getInstUnit(), ".icache_port"),
       removeInstsThisCycle(false),
       activityRec(params->name, NumStages, 10, params->activity),
-#if FULL_SYSTEM
       system(params->system),
-#endif // FULL_SYSTEM
 #ifdef DEBUG
       cpuEventNum(0),
       resReqCount(0),
 #endif // DEBUG
       drainCount(0),
-      deferRegistration(false/*params->deferRegistration*/),
       stageTracing(params->stageTracing),
       lastRunningCycle(0),
       instsPerSwitch(0)
 {    
-    ThreadID active_threads;
     cpu_params = params;
 
-    resPool = new ResourcePool(this, params);
-
     // Resize for Multithreading CPUs
     thread.resize(numThreads);
 
-#if FULL_SYSTEM
-    active_threads = 1;
-#else
-    active_threads = params->workload.size();
-
-    if (active_threads > MaxThreads) {
-        panic("Workload Size too large. Increase the 'MaxThreads'"
-              "in your InOrder implementation or "
-              "edit your workload size.");
-    }
+    ThreadID active_threads = params->workload.size();
+    if (FullSystem) {
+        active_threads = 1;
+    } else {
+        active_threads = params->workload.size();
 
-    
-    if (active_threads > 1) {
-        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
-
-        if (threadModel == SMT) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
-        } else if (threadModel == SwitchOnCacheMiss) {
-            DPRINTF(InOrderCPU, "Setting Thread Model to "
-                    "Switch On Cache Miss\n");
+        if (active_threads > MaxThreads) {
+            panic("Workload Size too large. Increase the 'MaxThreads'"
+                  "in your InOrder implementation or "
+                  "edit your workload size.");
         }
-        
-    } else {
-        threadModel = Single;
-    }
-     
-        
-    
-#endif
 
-    // Bind the fetch & data ports from the resource pool.
-    fetchPortIdx = resPool->getPortIdx(params->fetchMemPort);
-    if (fetchPortIdx == 0) {
-        fatal("Unable to find port to fetch instructions from.\n");
-    }
 
-    dataPortIdx = resPool->getPortIdx(params->dataMemPort);
-    if (dataPortIdx == 0) {
-        fatal("Unable to find port for data.\n");
+        if (active_threads > 1) {
+            threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+            if (threadModel == SMT) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");
+            } else if (threadModel == SwitchOnCacheMiss) {
+                DPRINTF(InOrderCPU, "Setting Thread Model to "
+                        "Switch On Cache Miss\n");
+            }
+
+        } else {
+            threadModel = Single;
+        }
     }
 
     for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        isa[tid] = params->isa[tid];
         pc[tid].set(0);
         lastCommittedPC[tid].set(0);
 
-#if FULL_SYSTEM
-        // SMT is not supported in FS mode yet.
-        assert(numThreads == 1);
-        thread[tid] = new Thread(this, 0);
-#else
-        if (tid < (ThreadID)params->workload.size()) {
-            DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
-                    tid, params->workload[tid]->prog_fname);
-            thread[tid] =
-                new Thread(this, tid, params->workload[tid]);
+        if (FullSystem) {
+            // SMT is not supported in FS mode yet.
+            assert(numThreads == 1);
+            thread[tid] = new Thread(this, 0, NULL);
         } else {
-            //Allocate Empty thread so M5 can use later
-            //when scheduling threads to CPU
-            Process* dummy_proc = params->workload[0];
-            thread[tid] = new Thread(this, tid, dummy_proc);
+            if (tid < (ThreadID)params->workload.size()) {
+                DPRINTF(InOrderCPU, "Workload[%i] process is %#x\n",
+                        tid, params->workload[tid]->progName());
+                thread[tid] =
+                    new Thread(this, tid, params->workload[tid]);
+            } else {
+                //Allocate Empty thread so M5 can use later
+                //when scheduling threads to CPU
+                Process* dummy_proc = params->workload[0];
+                thread[tid] = new Thread(this, tid, dummy_proc);
+            }
+
+            // Eventually set this with parameters...
+            asid[tid] = tid;
         }
-        
-        // Eventually set this with parameters...
-        asid[tid] = tid;
-#endif
 
         // Setup the TC that will serve as the interface to the threads/CPU.
         InOrderThreadContext *tc = new InOrderThreadContext;
         tc->cpu = this;
         tc->thread = thread[tid];
 
-#if FULL_SYSTEM
         // Setup quiesce event.
         this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc);
-#endif
 
         // Give the thread the TC.
         thread[tid]->tc = tc;
@@ -339,7 +361,10 @@ InOrderCPU::InOrderCPU(Params *params)
 
         memset(intRegs[tid], 0, sizeof(intRegs[tid]));
         memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
-        isa[tid].clear();
+#ifdef ISA_HAS_CC_REGS
+        memset(ccRegs[tid], 0, sizeof(ccRegs[tid]));
+#endif
+        isa[tid]->clear();
 
         // Define dummy instructions and resource requests to be used.
         dummyInst[tid] = new InOrderDynInst(this, 
@@ -350,15 +375,24 @@ InOrderCPU::InOrderCPU(Params *params)
 
         dummyReq[tid] = new ResourceRequest(resPool->getResource(0));
 
-#if FULL_SYSTEM
-    // Use this dummy inst to force squashing behind every instruction
-    // in pipeline
-    dummyTrapInst[tid] = new InOrderDynInst(this, NULL, 0, 0, 0);
-    dummyTrapInst[tid]->seqNum = 0;
-    dummyTrapInst[tid]->squashSeqNum = 0;
-    dummyTrapInst[tid]->setTid(tid);
-#endif
 
+        if (FullSystem) {
+            // Use this dummy inst to force squashing behind every instruction
+            // in pipeline
+            dummyTrapInst[tid] = new InOrderDynInst(this, NULL, 0, 0, 0);
+            dummyTrapInst[tid]->seqNum = 0;
+            dummyTrapInst[tid]->squashSeqNum = 0;
+            dummyTrapInst[tid]->setTid(tid);
+        }
+
+        trapPending[tid] = false;
+
+    }
+
+    // InOrderCPU always requires an interrupt controller.
+    if (!params->switched_out && !interrupts) {
+        fatal("InOrderCPU %s has no interrupt controller.\n"
+              "Ensure createInterruptController() is called.\n", name());
     }
 
     dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
@@ -373,13 +407,13 @@ InOrderCPU::InOrderCPU(Params *params)
     frontEndSked = createFrontEndSked();
     faultSked = createFaultSked();
 
-    lastRunningCycle = curTick();
+    lastRunningCycle = curCycle();
 
     lockAddr = 0;
     lockFlag = false;
     
     // Schedule First Tick Event, CPU will reschedule itself from here on out.
-    scheduleTickEvent(0);
+    scheduleTickEvent(Cycles(0));
 }
 
 InOrderCPU::~InOrderCPU()
@@ -640,16 +674,21 @@ InOrderCPU::regStats()
     committedInsts
         .init(numThreads)
         .name(name() + ".committedInsts")
-        .desc("Number of Instructions Simulated (Per-Thread)");
+        .desc("Number of Instructions committed (Per-Thread)");
+
+    committedOps
+        .init(numThreads)
+        .name(name() + ".committedOps")
+        .desc("Number of Ops committed (Per-Thread)");
 
     smtCommittedInsts
         .init(numThreads)
         .name(name() + ".smtCommittedInsts")
-        .desc("Number of SMT Instructions Simulated (Per-Thread)");
+        .desc("Number of SMT Instructions committed (Per-Thread)");
 
     totalCommittedInsts
         .name(name() + ".committedInsts_total")
-        .desc("Number of Instructions Simulated (Total)");
+        .desc("Number of Instructions committed (Total)");
 
     cpi
         .name(name() + ".cpi")
@@ -698,8 +737,9 @@ InOrderCPU::tick()
 
     ++numCycles;
 
+    checkForInterrupts();
+
     bool pipes_idle = true;
-    
     //Tick each of the stages
     for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
         pipelineStage[stNum]->tick();
@@ -707,8 +747,6 @@ InOrderCPU::tick()
         pipes_idle = pipes_idle && pipelineStage[stNum]->idle;
     }
 
-    checkForInterrupts();
-
     if (pipes_idle)
         idleCycles++;
     else
@@ -729,17 +767,17 @@ InOrderCPU::tick()
     if (!tickEvent.scheduled()) {
         if (_status == SwitchedOut) {
             // increment stat
-            lastRunningCycle = curTick();
+            lastRunningCycle = curCycle();
         } else if (!activityRec.active()) {
             DPRINTF(InOrderCPU, "sleeping CPU.\n");
-            lastRunningCycle = curTick();
+            lastRunningCycle = curCycle();
             timesIdled++;
         } else {
             //Tick next_tick = curTick() + cycles(1);
             //tickEvent.schedule(next_tick);
-            schedule(&tickEvent, nextCycle(curTick() + 1));
+            schedule(&tickEvent, clockEdge(Cycles(1)));
             DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
-                    nextCycle(curTick() + 1));
+                    clockEdge(Cycles(1)));
         }
     }
 
@@ -751,37 +789,40 @@ InOrderCPU::tick()
 void
 InOrderCPU::init()
 {
-    if (!deferRegistration) {
-        registerThreadContexts();
-    }
+    BaseCPU::init();
 
-    // Set inSyscall so that the CPU doesn't squash when initially
-    // setting up registers.
-    for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = true;
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        // Set noSquashFromTC so that the CPU doesn't squash when initially
+        // setting up registers.
+        thread[tid]->noSquashFromTC = true;
+        // Initialise the ThreadContext's memory proxies
+        thread[tid]->initMemProxies(thread[tid]->getTC());
+    }
 
-#if FULL_SYSTEM
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        ThreadContext *src_tc = threadContexts[tid];
-        TheISA::initCPU(src_tc, src_tc->contextId());
+    if (FullSystem && !params()->switched_out) {
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            ThreadContext *src_tc = threadContexts[tid];
+            TheISA::initCPU(src_tc, src_tc->contextId());
+        }
     }
-#endif
 
-    // Clear inSyscall.
+    // Clear noSquashFromTC.
     for (ThreadID tid = 0; tid < numThreads; ++tid)
-        thread[tid]->inSyscall = false;
+        thread[tid]->noSquashFromTC = false;
 
     // Call Initializiation Routine for Resource Pool
     resPool->init();
 }
 
-Port*
-InOrderCPU::getPort(const std::string &if_name, int idx)
+void
+InOrderCPU::verifyMemoryMode() const
 {
-    return resPool->getPort(if_name, idx);
+    if (!system->isTimingMode()) {
+        fatal("The in-order CPU requires the memory system to be in "
+              "'timing' mode.\n");
+    }
 }
 
-#if FULL_SYSTEM
 Fault
 InOrderCPU::hwrei(ThreadID tid)
 {
@@ -851,7 +892,7 @@ InOrderCPU::checkForInterrupts()
                 // Schedule Squash Through-out Resource Pool
                 resPool->scheduleEvent(
                     (InOrderCPU::CPUEventType)ResourcePool::SquashAll,
-                    dummyTrapInst[tid], 0);
+                    dummyTrapInst[tid], Cycles(0));
 
                 // Finally, Setup Trap to happen at end of cycle
                 trapContext(interrupt, tid, dummyTrapInst[tid]);
@@ -867,7 +908,6 @@ InOrderCPU::getInterrupts()
     return interrupts->getInterrupt(threadContexts[0]);
 }
 
-
 void
 InOrderCPU::processInterrupts(Fault interrupt)
 {
@@ -886,22 +926,12 @@ InOrderCPU::processInterrupts(Fault interrupt)
     trap(interrupt, threadContexts[0]->contextId(), dummyBufferInst);
 }
 
-
-void
-InOrderCPU::updateMemPorts()
-{
-    // Update all ThreadContext's memory ports (Functional/Virtual
-    // Ports)
-    ThreadID size = thread.size();
-    for (ThreadID i = 0; i < size; ++i)
-        thread[i]->connectMemPorts(thread[i]->getTC());
-}
-#endif
-
 void
-InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst, int delay)
+InOrderCPU::trapContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                        Cycles delay)
 {
     scheduleCpuEvent(Trap, fault, tid, inst, delay);
+    trapPending[tid] = true;
 }
 
 void
@@ -912,7 +942,8 @@ InOrderCPU::trap(Fault fault, ThreadID tid, DynInstPtr inst)
 }
 
 void 
-InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid,
+                               Cycles delay)
 {
     scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
 }
@@ -940,25 +971,21 @@ InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num,
 void
 InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
                              ThreadID tid, DynInstPtr inst, 
-                             unsigned delay, CPUEventPri event_pri)
+                             Cycles delay, CPUEventPri event_pri)
 {
     CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
                                        event_pri);
 
-    Tick sked_tick = nextCycle(curTick() + ticks(delay));
-    if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
-                eventNames[c_event], curTick() + delay, tid);
-        schedule(cpu_event, sked_tick);
-    } else {
-        cpu_event->process();
-        cpuEventRemoveList.push(cpu_event);
-    }
+    Tick sked_tick = clockEdge(delay);
+    DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+            eventNames[c_event], curTick() + delay, tid);
+    schedule(cpu_event, sked_tick);
 
     // Broadcast event to the Resource Pool
     // Need to reset tid just in case this is a dummy instruction
     inst->setTid(tid);        
-    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
+    // @todo: Is this really right? Should the delay not be passed on?
+    resPool->scheduleEvent(c_event, inst, Cycles(0), 0, tid);
 }
 
 bool
@@ -1062,7 +1089,7 @@ InOrderCPU::activateThreadInPipeline(ThreadID tid)
 }
 
 void
-InOrderCPU::deactivateContext(ThreadID tid, int delay)
+InOrderCPU::deactivateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
 
@@ -1118,7 +1145,6 @@ InOrderCPU::updateThreadPriority()
         //DEFAULT TO ROUND ROBIN SCHEME
         //e.g. Move highest priority to end of thread list
         list<ThreadID>::iterator list_begin = activeThreads.begin();
-        list<ThreadID>::iterator list_end   = activeThreads.end();
 
         unsigned high_thread = *list_begin;
 
@@ -1145,7 +1171,7 @@ InOrderCPU::tickThreadStats()
 }
 
 void
-InOrderCPU::activateContext(ThreadID tid, int delay)
+InOrderCPU::activateContext(ThreadID tid, Cycles delay)
 {
     DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
 
@@ -1160,7 +1186,7 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
 }
 
 void
-InOrderCPU::activateNextReadyContext(int delay)
+InOrderCPU::activateNextReadyContext(Cycles delay)
 {
     DPRINTF(InOrderCPU,"Activating next ready thread\n");
 
@@ -1175,11 +1201,11 @@ InOrderCPU::activateNextReadyContext(int delay)
 }
 
 void
-InOrderCPU::haltContext(ThreadID tid, int delay)
+InOrderCPU::haltContext(ThreadID tid)
 {
     DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
 
-    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid]);
 
     activityRec.activity();
 }
@@ -1200,9 +1226,9 @@ InOrderCPU::haltThread(ThreadID tid)
 }
 
 void
-InOrderCPU::suspendContext(ThreadID tid, int delay)
+InOrderCPU::suspendContext(ThreadID tid)
 {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid]);
 }
 
 void
@@ -1234,18 +1260,24 @@ InOrderCPU::getPipeStage(int stage_num)
 
 
 RegIndex
-InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegType &reg_type, ThreadID tid)
-{
-    if (reg_idx < FP_Base_DepTag) {
-        reg_type = IntType;
-        return isa[tid].flattenIntIndex(reg_idx);
-    } else if (reg_idx < Ctrl_Base_DepTag) {
-        reg_type = FloatType;
-        reg_idx -= FP_Base_DepTag;
-        return isa[tid].flattenFloatIndex(reg_idx);
-    } else {
-        reg_type = MiscType;
-        return reg_idx - TheISA::Ctrl_Base_DepTag;
+InOrderCPU::flattenRegIdx(RegIndex reg_idx, RegClass &reg_type, ThreadID tid)
+{
+    RegIndex rel_idx;
+
+    reg_type = regIdxToClass(reg_idx, &rel_idx);
+
+    switch (reg_type) {
+      case IntRegClass:
+        return isa[tid]->flattenIntIndex(rel_idx);
+
+      case FloatRegClass:
+        return isa[tid]->flattenFloatIndex(rel_idx);
+
+      case MiscRegClass:
+        return rel_idx;
+
+      default:
+        panic("register %d out of range\n", reg_idx);
     }
 }
 
@@ -1276,6 +1308,19 @@ InOrderCPU::readFloatRegBits(RegIndex reg_idx, ThreadID tid)
     return floatRegs.i[tid][reg_idx];
 }
 
+CCReg
+InOrderCPU::readCCReg(RegIndex reg_idx, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Reading CC. Reg %i as %x\n",
+            tid, reg_idx, ccRegs[tid][reg_idx]);
+
+    return ccRegs[tid][reg_idx];
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
+}
+
 void
 InOrderCPU::setIntReg(RegIndex reg_idx, uint64_t val, ThreadID tid)
 {
@@ -1315,6 +1360,18 @@ InOrderCPU::setFloatRegBits(RegIndex reg_idx, FloatRegBits val, ThreadID tid)
             floatRegs.f[tid][reg_idx]);
 }
 
+void
+InOrderCPU::setCCReg(RegIndex reg_idx, CCReg val, ThreadID tid)
+{
+#ifdef ISA_HAS_CC_REGS
+    DPRINTF(CCRegs, "[tid:%i]: Setting CC. Reg %i to %x\n",
+            tid, reg_idx, val);
+    ccRegs[tid][reg_idx] = val;
+#else
+    panic("readCCReg: ISA does not have CC regs\n");
+#endif
+}
+
 uint64_t
 InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
 {
@@ -1323,18 +1380,25 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {                   
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
         // Integer Register File
-        return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        return readIntReg(rel_idx, tid);
+
+      case FloatRegClass:
         // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        return readFloatRegBits(reg_idx, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        return readMiscReg(reg_idx, tid);  // Misc. Register File
+        return readFloatRegBits(rel_idx, tid);
+
+      case MiscRegClass:
+        return readMiscReg(rel_idx, tid);  // Misc. Register File
+
+      default:
+        panic("register %d out of range\n", reg_idx);
     }
 }
+
 void
 InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
                               ThreadID tid)
@@ -1344,44 +1408,49 @@ InOrderCPU::setRegOtherThread(unsigned reg_idx, const MiscReg &val,
         tid = TheISA::getTargetThread(tcBase(tid));
     }
 
-    if (reg_idx < FP_Base_DepTag) {            // Integer Register File
-        setIntReg(reg_idx, val, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {   // Float Register File
-        reg_idx -= FP_Base_DepTag;
-        setFloatRegBits(reg_idx, val, tid);
-    } else {
-        reg_idx -= Ctrl_Base_DepTag;
-        setMiscReg(reg_idx, val, tid); // Misc. Register File
+    RegIndex rel_idx;
+
+    switch (regIdxToClass(reg_idx, &rel_idx)) {
+      case IntRegClass:
+        setIntReg(rel_idx, val, tid);
+        break;
+
+      case FloatRegClass:
+        setFloatRegBits(rel_idx, val, tid);
+        break;
+
+      case CCRegClass:
+        setCCReg(rel_idx, val, tid);
+        break;
+
+      case MiscRegClass:
+        setMiscReg(rel_idx, val, tid); // Misc. Register File
+        break;
     }
 }
 
 MiscReg
 InOrderCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid)
 {
-    return isa[tid].readMiscRegNoEffect(misc_reg);
+    return isa[tid]->readMiscRegNoEffect(misc_reg);
 }
 
 MiscReg
 InOrderCPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    DPRINTF(InOrderCPU, "MiscReg: %i\n", misc_reg);
-    DPRINTF(InOrderCPU, "tid: %i\n", tid);
-    DPRINTF(InOrderCPU, "tcBase: %x\n", tcBase(tid));
-    DPRINTF(InOrderCPU, "isa-tid: %x\n", &isa[tid]);
-
-    return isa[tid].readMiscReg(misc_reg, tcBase(tid));
+    return isa[tid]->readMiscReg(misc_reg, tcBase(tid));
 }
 
 void
 InOrderCPU::setMiscRegNoEffect(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscRegNoEffect(misc_reg, val);
+    isa[tid]->setMiscRegNoEffect(misc_reg, val);
 }
 
 void
 InOrderCPU::setMiscReg(int misc_reg, const MiscReg &val, ThreadID tid)
 {
-    isa[tid].setMiscReg(misc_reg, val, tcBase(tid));
+    isa[tid]->setMiscReg(misc_reg, val, tcBase(tid));
 }
 
 
@@ -1461,19 +1530,26 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     
     // Increment thread-state's instruction count
     thread[tid]->numInst++;
+    thread[tid]->numOp++;
 
     // Increment thread-state's instruction stats
     thread[tid]->numInsts++;
+    thread[tid]->numOps++;
 
     // Count committed insts per thread stats
-    committedInsts[tid]++;
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        committedInsts[tid]++;
+
+        // Count total insts committed stat
+        totalCommittedInsts++;
+    }
 
-    // Count total insts committed stat
-    totalCommittedInsts++;
+    committedOps[tid]++;
 
     // Count SMT-committed insts per thread stat
     if (numActiveThreads() > 1) {
-        smtCommittedInsts[tid]++;
+        if (!inst->isMicroop() || inst->isLastMicroop())
+            smtCommittedInsts[tid]++;
     }
 
     // Instruction-Mix Stats
@@ -1494,7 +1570,7 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
     }
 
     // Check for instruction-count-based events.
-    comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst);
+    comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp);
 
     // Finally, remove instruction from CPU
     removeInst(inst);
@@ -1681,7 +1757,9 @@ InOrderCPU::wakeCPU()
 
     DPRINTF(Activity, "Waking up CPU\n");
 
-    Tick extra_cycles = tickToCycles((curTick() - 1) - lastRunningCycle);
+    Tick extra_cycles = curCycle() - lastRunningCycle;
+    if (extra_cycles != 0)
+        --extra_cycles;
 
     idleCycles += extra_cycles;    
     for (int stage_num = 0; stage_num < NumStages; stage_num++) {
@@ -1690,10 +1768,9 @@ InOrderCPU::wakeCPU()
 
     numCycles += extra_cycles;
 
-    schedule(&tickEvent, nextCycle(curTick()));
+    schedule(&tickEvent, clockEdge());
 }
 
-#if FULL_SYSTEM
 // Lots of copied full system code...place into BaseCPU class?
 void
 InOrderCPU::wakeup()
@@ -1706,13 +1783,22 @@ InOrderCPU::wakeup()
     DPRINTF(Quiesce, "Suspended Processor woken\n");
     threadContexts[0]->activate();
 }
-#endif
 
-#if !FULL_SYSTEM
 void
-InOrderCPU::syscallContext(Fault fault, ThreadID tid, DynInstPtr inst, int delay)
+InOrderCPU::syscallContext(Fault fault, ThreadID tid, DynInstPtr inst,
+                           Cycles delay)
 {
-    //@todo: squash behind syscall
+    // Syscall must be non-speculative, so squash from last stage
+    unsigned squash_stage = NumStages - 1;
+    inst->setSquashInfo(squash_stage);
+
+    // Squash In Pipeline Stage
+    pipelineStage[squash_stage]->setupSquash(inst, tid);
+
+    // Schedule Squash Through-out Resource Pool
+    resPool->scheduleEvent(
+        (InOrderCPU::CPUEventType)ResourcePool::SquashAll, inst,
+        Cycles(0));
     scheduleCpuEvent(Syscall, fault, tid, inst, delay, Syscall_Pri);
 }
 
@@ -1737,13 +1823,11 @@ InOrderCPU::syscall(int64_t callnum, ThreadID tid)
     // Clear Non-Speculative Block Variable
     nonSpecInstActive[tid] = false;
 }
-#endif
 
 TheISA::TLB*
 InOrderCPU::getITBPtr()
 {
-    CacheUnit *itb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(fetchPortIdx));
+    CacheUnit *itb_res = resPool->getInstUnit();
     return itb_res->tlb();
 }
 
@@ -1751,30 +1835,26 @@ InOrderCPU::getITBPtr()
 TheISA::TLB*
 InOrderCPU::getDTBPtr()
 {
-    CacheUnit *dtb_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return dtb_res->tlb();
+    return resPool->getDataUnit()->tlb();
+}
+
+TheISA::Decoder *
+InOrderCPU::getDecoderPtr(unsigned tid)
+{
+    return resPool->getInstUnit()->decoder[tid];
 }
 
 Fault
 InOrderCPU::read(DynInstPtr inst, Addr addr,
                  uint8_t *data, unsigned size, unsigned flags)
 {
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res = 
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-
-    return cache_res->read(inst, addr, data, size, flags);
+    return resPool->getDataUnit()->read(inst, addr, data, size, flags);
 }
 
 Fault
 InOrderCPU::write(DynInstPtr inst, uint8_t *data, unsigned size,
                   Addr addr, unsigned flags, uint64_t *write_res)
 {
-    //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
-    //       you want to run w/out caches?
-    CacheUnit *cache_res =
-        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
-    return cache_res->write(inst, data, size, addr, flags, write_res);
+    return resPool->getDataUnit()->write(inst, data, size, addr, flags,
+                                         write_res);
 }