merge

author Brad Beckmann <Brad.Beckmann@amd.com>

Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)

committer Brad Beckmann <Brad.Beckmann@amd.com>

Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)
author Brad Beckmann <Brad.Beckmann@amd.com>
Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)
committer Brad Beckmann <Brad.Beckmann@amd.com>
Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)
diff --git a/src/cpu/inorder/InOrderCPU.py b/src/cpu/inorder/InOrderCPU.py

index a0b0466a70922168bf201acf39e6baee4b897889..d6db346d4bbd8467ad0c54579d93f8aaad55848c 100644 (file)
--- a/src/cpu/inorder/InOrderCPU.py
+++ b/src/cpu/inorder/InOrderCPU.py
@@ -30,10 +30,15 @@ from m5.params import *
  from m5.proxy import *
  from BaseCPU import BaseCPU
  
+class ThreadModel(Enum):
+    vals = ['Single', 'SMT', 'SwitchOnCacheMiss']
+
  class InOrderCPU(BaseCPU):
      type = 'InOrderCPU'
      activity = Param.Unsigned(0, "Initial count")
  
+    threadModel = Param.ThreadModel('SMT', "Multithreading model (SE-MODE only)")
+    
      cachePorts = Param.Unsigned(2, "Cache Ports")
      stageWidth = Param.Unsigned(1, "Stage width")
  
diff --git a/src/cpu/inorder/SConscript b/src/cpu/inorder/SConscript

index 82a1028c25fa45196c998364d923a1c4b68e30cb..f222350af9754b376e5862431ad5d71a39524fda 100644 (file)
--- a/src/cpu/inorder/SConscript
+++ b/src/cpu/inorder/SConscript
@@ -52,12 +52,16 @@ if 'InOrderCPU' in env['CPU_MODELS']:
         TraceFlag('InOrderUseDef')
         TraceFlag('InOrderMDU')
         TraceFlag('InOrderGraduation')
+       TraceFlag('ThreadModel')
         TraceFlag('RefCount')
+       TraceFlag('AddrDep')    
+       
  
         CompoundFlag('InOrderCPUAll', [ 'InOrderStage', 'InOrderStall', 'InOrderCPU',
                'InOrderMDU', 'InOrderAGEN', 'InOrderFetchSeq', 'InOrderTLB', 'InOrderBPred',
                'InOrderDecode', 'InOrderExecute', 'InOrderInstBuffer', 'InOrderUseDef',
-              'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource'])
+              'InOrderGraduation', 'InOrderCachePort', 'RegDepMap', 'Resource',
+              'ThreadModel'])
  
         Source('pipeline_traits.cc')        
         Source('inorder_dyn_inst.cc')
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc

index 1e3fdc40e0a65f9a79b5d430b7b263fd3afb93d4..7342f9bc5b9115c69a81e669d187d782400433c5 100644 (file)
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -84,25 +84,25 @@ InOrderCPU::TickEvent::description()
  }
  
  InOrderCPU::CPUEvent::CPUEvent(InOrderCPU *_cpu, CPUEventType e_type,
-                             Fault fault, ThreadID _tid, unsigned _vpe)
-    : Event(CPU_Tick_Pri), cpu(_cpu)
+                               Fault fault, ThreadID _tid, DynInstPtr inst,
+                               unsigned event_pri_offset)
+    : Event(Event::Priority((unsigned int)CPU_Tick_Pri + event_pri_offset)),
+      cpu(_cpu)
  {
-    setEvent(e_type, fault, _tid, _vpe);
+    setEvent(e_type, fault, _tid, inst);
  }
  
  
  std::string InOrderCPU::eventNames[NumCPUEvents] =
  {
      "ActivateThread",
-    "DeallocateThread",
+    "ActivateNextReadyThread",
+    "DeactivateThread",
+    "HaltThread",
      "SuspendThread",
-    "DisableThreads",
-    "EnableThreads",
-    "DisableVPEs",
-    "EnableVPEs",
      "Trap",
      "InstGraduated",
-    "SquashAll",
+    "SquashFromMemStall",
      "UpdatePCs"
  };
  
@@ -115,28 +115,24 @@ InOrderCPU::CPUEvent::process()
          cpu->activateThread(tid);
          break;
  
-      //@TODO: Consider Implementing "Suspend Thread" as Separate from Deallocate
-      case SuspendThread: // Suspend & Deallocate are same for now.
-        //cpu->suspendThread(tid);
-        //break;
-      case DeallocateThread:
-        cpu->deallocateThread(tid);
+      case ActivateNextReadyThread:
+        cpu->activateNextReadyThread();
          break;
  
-      case EnableVPEs:
-        cpu->enableVPEs(vpe);
+      case DeactivateThread:
+        cpu->deactivateThread(tid);
          break;
  
-      case DisableVPEs:
-        cpu->disableVPEs(tid, vpe);
+      case HaltThread:
+        cpu->haltThread(tid);
          break;
  
-      case EnableThreads:
-        cpu->enableThreads(vpe);
+      case SuspendThread: 
+        cpu->suspendThread(tid);
          break;
  
-      case DisableThreads:
-        cpu->disableThreads(tid, vpe);
+      case SquashFromMemStall:
+        cpu->squashDueToMemStall(inst->squashingStage, inst->seqNum, tid);
          break;
  
        case Trap:
@@ -144,12 +140,14 @@ InOrderCPU::CPUEvent::process()
          break;
  
        default:
-        fatal("Unrecognized Event Type %d", cpuEventType);
+        fatal("Unrecognized Event Type %s", eventNames[cpuEventType]);    
      }
-
+    
      cpu->cpuEventRemoveList.push(this);
  }
  
+    
+
  const char *
  InOrderCPU::CPUEvent::description()
  {
@@ -185,11 +183,15 @@ InOrderCPU::InOrderCPU(Params *params)
        system(params->system),
        physmem(system->physmem),
  #endif // FULL_SYSTEM
+#ifdef DEBUG
+      cpuEventNum(0),
+      resReqCount(0),
+#endif // DEBUG
        switchCount(0),
        deferRegistration(false/*params->deferRegistration*/),
        stageTracing(params->stageTracing),
-      numVirtProcs(1)
-{
+      instsPerSwitch(0)
+{    
      ThreadID active_threads;
      cpu_params = params;
  
@@ -208,6 +210,24 @@ InOrderCPU::InOrderCPU(Params *params)
                "in your InOrder implementation or "
                "edit your workload size.");
      }
+
+    
+    if (active_threads > 1) {
+        threadModel = (InOrderCPU::ThreadModel) params->threadModel;
+
+        if (threadModel == SMT) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to SMT.\n");            
+        } else if (threadModel == SwitchOnCacheMiss) {
+            DPRINTF(InOrderCPU, "Setting Thread Model to "
+                    "Switch On Cache Miss\n");
+        }
+        
+    } else {
+        threadModel = Single;
+    }
+     
+        
+    
  #endif
  
      // Bind the fetch & data ports from the resource pool.
@@ -238,6 +258,9 @@ InOrderCPU::InOrderCPU(Params *params)
              Process* dummy_proc = params->workload[0];
              thread[tid] = new Thread(this, tid, dummy_proc);
          }
+        
+        // Eventually set this with parameters...
+        asid[tid] = tid;
  #endif
  
          // Setup the TC that will serve as the interface to the threads/CPU.
@@ -293,15 +316,30 @@ InOrderCPU::InOrderCPU(Params *params)
          memset(floatRegs.i[tid], 0, sizeof(floatRegs.i[tid]));
          isa[tid].clear();
  
-        isa[tid].expandForMultithreading(numThreads, numVirtProcs);
+        isa[tid].expandForMultithreading(numThreads, 1/*numVirtProcs*/);
+
+        // Define dummy instructions and resource requests to be used.
+        dummyInst[tid] = new InOrderDynInst(this, 
+                                            thread[tid], 
+                                            0, 
+                                            tid, 
+                                            asid[tid]);
+
+        dummyReq[tid] = new ResourceRequest(resPool->getResource(0), 
+                                            dummyInst[tid], 
+                                            0, 
+                                            0, 
+                                            0, 
+                                            0);        
      }
  
-    lastRunningCycle = curTick;
-    contextSwitch = false;
+    dummyReqInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyReqInst->setSquashed();
  
-    // Define dummy instructions and resource requests to be used.
-    DynInstPtr dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0);
-    dummyReq = new ResourceRequest(NULL, NULL, 0, 0, 0, 0);
+    dummyBufferInst = new InOrderDynInst(this, NULL, 0, 0, 0);
+    dummyBufferInst->setSquashed();
+    
+    lastRunningCycle = curTick;
  
      // Reset CPU to reset state.
  #if FULL_SYSTEM
@@ -311,10 +349,17 @@ InOrderCPU::InOrderCPU(Params *params)
      reset();
  #endif
  
+    dummyBufferInst->resetInstCount();
+    
      // Schedule First Tick Event, CPU will reschedule itself from here on out.
      scheduleTickEvent(0);
  }
  
+InOrderCPU::~InOrderCPU()
+{
+    delete resPool;
+}
+
  
  void
  InOrderCPU::regStats()
@@ -322,7 +367,28 @@ InOrderCPU::regStats()
      /* Register the Resource Pool's stats here.*/
      resPool->regStats();
  
+#ifdef DEBUG
+    maxResReqCount
+        .name(name() + ".maxResReqCount")
+        .desc("Maximum number of live resource requests in CPU")
+        .prereq(maxResReqCount);   
+#endif
+
+    /* Register for each Pipeline Stage */
+    for (int stage_num=0; stage_num < ThePipeline::NumStages; stage_num++) {
+        pipelineStage[stage_num]->regStats();
+    }
+
      /* Register any of the InOrderCPU's stats here.*/
+    instsPerCtxtSwitch
+        .name(name() + ".instsPerContextSwitch")
+        .desc("Instructions Committed Per Context Switch")
+        .prereq(instsPerCtxtSwitch);
+    
+    numCtxtSwitches
+        .name(name() + ".contextSwitches")
+        .desc("Number of context switches");
+            
      timesIdled
          .name(name() + ".timesIdled")
          .desc("Number of times that the entire CPU went into an idle state and"
@@ -331,9 +397,17 @@ InOrderCPU::regStats()
  
      idleCycles
          .name(name() + ".idleCycles")
-        .desc("Total number of cycles that the CPU has spent unscheduled due "
-              "to idling")
-        .prereq(idleCycles);
+        .desc("Number of cycles cpu's stages were not processed");
+
+    runCycles
+        .name(name() + ".runCycles")
+        .desc("Number of cycles cpu stages are processed.");
+
+    activity
+        .name(name() + ".activity")
+        .desc("Percentage of cycles cpu is active")
+        .precision(6);
+    activity = (runCycles / numCycles) * 100;
  
      threadCycles
          .init(numThreads)
@@ -342,7 +416,7 @@ InOrderCPU::regStats()
  
      smtCycles
          .name(name() + ".smtCycles")
-        .desc("Total number of cycles that the CPU was simultaneous multithreading.(SMT)");
+        .desc("Total number of cycles that the CPU was in SMT-mode");
  
      committedInsts
          .init(numThreads)
@@ -405,18 +479,27 @@ InOrderCPU::tick()
  
      ++numCycles;
  
+    bool pipes_idle = true;
+    
      //Tick each of the stages
      for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
          pipelineStage[stNum]->tick();
+
+        pipes_idle = pipes_idle && pipelineStage[stNum]->idle;
      }
  
+    if (pipes_idle)
+        idleCycles++;
+    else
+        runCycles++;
+    
      // Now advance the time buffers one tick
      timeBuffer.advance();
      for (int sqNum=0; sqNum < NumStages - 1; sqNum++) {
          stageQueue[sqNum]->advance();
      }
      activityRec.advance();
-
+   
      // Any squashed requests, events, or insts then remove them now
      cleanUpRemovedReqs();
      cleanUpRemovedEvents();
@@ -435,7 +518,8 @@ InOrderCPU::tick()
              //Tick next_tick = curTick + cycles(1);
              //tickEvent.schedule(next_tick);
              mainEventQueue.schedule(&tickEvent, nextCycle(curTick + 1));
-            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", nextCycle(curTick + 1));
+            DPRINTF(InOrderCPU, "Scheduled CPU for next tick @ %i.\n", 
+                    nextCycle(curTick + 1));
          }
      }
  
@@ -476,7 +560,7 @@ InOrderCPU::reset()
  {
      for (int i = 0; i < numThreads; i++) {
          isa[i].reset(coreType, numThreads,
-                numVirtProcs, dynamic_cast<BaseCPU*>(this));
+                     1/*numVirtProcs*/, dynamic_cast<BaseCPU*>(this));
      }
  }
  
@@ -545,7 +629,7 @@ void
  InOrderCPU::trap(Fault fault, ThreadID tid, int delay)
  {
      //@ Squash Pipeline during TRAP
-    scheduleCpuEvent(Trap, fault, tid, 0/*vpe*/, delay);
+    scheduleCpuEvent(Trap, fault, tid, dummyInst[tid], delay);
  }
  
  void
@@ -554,15 +638,42 @@ InOrderCPU::trapCPU(Fault fault, ThreadID tid)
      fault->invoke(tcBase(tid));
  }
  
+void 
+InOrderCPU::squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay)
+{
+    scheduleCpuEvent(SquashFromMemStall, NoFault, tid, inst, delay);
+}
+
+
+void
+InOrderCPU::squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid)
+{
+    DPRINTF(InOrderCPU, "Squashing Pipeline Stages Due to Memory Stall...\n");
+        
+    // Squash all instructions in each stage including 
+    // instruction that caused the squash (seq_num - 1)
+    // NOTE: The stage bandwidth needs to be cleared so thats why
+    //       the stalling instruction is squashed as well. The stalled
+    //       instruction is previously placed in another intermediate buffer
+    //       while it's stall is being handled.
+    InstSeqNum squash_seq_num = seq_num - 1;
+    
+    for (int stNum=stage_num; stNum >= 0 ; stNum--) {
+        pipelineStage[stNum]->squashDueToMemStall(squash_seq_num, tid);
+    }
+}
+
  void
  InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
-                           ThreadID tid, unsigned vpe, unsigned delay)
+                             ThreadID tid, DynInstPtr inst, 
+                             unsigned delay, unsigned event_pri_offset)
  {
-    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, vpe);
+    CPUEvent *cpu_event = new CPUEvent(this, c_event, fault, tid, inst,
+                                       event_pri_offset);
  
      if (delay >= 0) {
-        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i.\n",
-                eventNames[c_event], curTick + delay);
+        DPRINTF(InOrderCPU, "Scheduling CPU Event (%s) for cycle %i, [tid:%i].\n",
+                eventNames[c_event], curTick + delay, tid);
          mainEventQueue.schedule(cpu_event,curTick + delay);
      } else {
          cpu_event->process();
@@ -570,12 +681,12 @@ InOrderCPU::scheduleCpuEvent(CPUEventType c_event, Fault fault,
      }
  
      // Broadcast event to the Resource Pool
-    DynInstPtr dummy_inst =
-        new InOrderDynInst(this, NULL, getNextEventNum(), tid);
-    resPool->scheduleEvent(c_event, dummy_inst, 0, 0, tid);
+    // Need to reset tid just in case this is a dummy instruction
+    inst->setTid(tid);        
+    resPool->scheduleEvent(c_event, inst, 0, 0, tid);
  }
  
-inline bool
+bool
  InOrderCPU::isThreadActive(ThreadID tid)
  {
    list<ThreadID>::iterator isActive =
@@ -584,206 +695,144 @@ InOrderCPU::isThreadActive(ThreadID tid)
      return (isActive != activeThreads.end());
  }
  
-
-void
-InOrderCPU::activateThread(ThreadID tid)
+bool
+InOrderCPU::isThreadReady(ThreadID tid)
  {
-    if (!isThreadActive(tid)) {
-        DPRINTF(InOrderCPU,
-                "Adding Thread %i to active threads list in CPU.\n", tid);
-        activeThreads.push_back(tid);
+  list<ThreadID>::iterator isReady =
+      std::find(readyThreads.begin(), readyThreads.end(), tid);
  
-        wakeCPU();
-    }
+    return (isReady != readyThreads.end());
  }
  
-void
-InOrderCPU::deactivateThread(ThreadID tid)
-{
-    DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid);
-
-    if (isThreadActive(tid)) {
-        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
-                tid);
-        list<ThreadID>::iterator thread_it =
-            std::find(activeThreads.begin(), activeThreads.end(), tid);
-
-        removePipelineStalls(*thread_it);
-
-        //@TODO: change stage status' to Idle?
-
-        activeThreads.erase(thread_it);
-    }
-}
-
-void
-InOrderCPU::removePipelineStalls(ThreadID tid)
-{
-    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
-            tid);
-
-    for (int stNum = 0; stNum < NumStages ; stNum++) {
-        pipelineStage[stNum]->removeStalls(tid);
-    }
-
-}
  bool
-InOrderCPU::isThreadInCPU(ThreadID tid)
+InOrderCPU::isThreadSuspended(ThreadID tid)
  {
-  list<ThreadID>::iterator isCurrent =
-      std::find(currentThreads.begin(), currentThreads.end(), tid);
+  list<ThreadID>::iterator isSuspended =
+      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
  
-    return (isCurrent != currentThreads.end());
+    return (isSuspended != suspendedThreads.end());
  }
  
  void
-InOrderCPU::addToCurrentThreads(ThreadID tid)
-{
-    if (!isThreadInCPU(tid)) {
-        DPRINTF(InOrderCPU, "Adding Thread %i to current threads list in CPU.\n",
-                tid);
-        currentThreads.push_back(tid);
-    }
+InOrderCPU::activateNextReadyThread()
+{
+    if (readyThreads.size() >= 1) {          
+        ThreadID ready_tid = readyThreads.front();
+        
+        // Activate in Pipeline
+        activateThread(ready_tid);                        
+        
+        // Activate in Resource Pool
+        resPool->activateAll(ready_tid);
+        
+        list<ThreadID>::iterator ready_it =
+            std::find(readyThreads.begin(), readyThreads.end(), ready_tid);
+        readyThreads.erase(ready_it);                        
+    } else {
+        DPRINTF(InOrderCPU,
+                "Attempting to activate new thread, but No Ready Threads to"
+                "activate.\n");
+        DPRINTF(InOrderCPU,
+                "Unable to switch to next active thread.\n");
+    }        
  }
  
  void
-InOrderCPU::removeFromCurrentThreads(ThreadID tid)
+InOrderCPU::activateThread(ThreadID tid)
  {
-    if (isThreadInCPU(tid)) {
+    if (isThreadSuspended(tid)) {
          DPRINTF(InOrderCPU,
-                "Adding Thread %i to current threads list in CPU.\n", tid);
-        list<ThreadID>::iterator isCurrent =
-            std::find(currentThreads.begin(), currentThreads.end(), tid);
-        currentThreads.erase(isCurrent);
-    }
-}
-
-bool
-InOrderCPU::isThreadSuspended(ThreadID tid)
-{
-  list<ThreadID>::iterator isSuspended =
-      std::find(suspendedThreads.begin(), suspendedThreads.end(), tid);
+                "Removing [tid:%i] from suspended threads list.\n", tid);
  
-    return (isSuspended!= suspendedThreads.end());
-}
+        list<ThreadID>::iterator susp_it =
+            std::find(suspendedThreads.begin(), suspendedThreads.end(), 
+                      tid);
+        suspendedThreads.erase(susp_it);                        
+    }
  
-void
-InOrderCPU::enableVirtProcElement(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Enabling of concurrent virtual processor execution",
-            vpe);
+    if (threadModel == SwitchOnCacheMiss &&
+        numActiveThreads() == 1) {
+        DPRINTF(InOrderCPU,
+                "Ignoring activation of [tid:%i], since [tid:%i] is "
+                "already running.\n", tid, activeThreadId());
+        
+        DPRINTF(InOrderCPU,"Placing [tid:%i] on ready threads list\n", 
+                tid);        
+
+        readyThreads.push_back(tid);
+        
+    } else if (!isThreadActive(tid)) {                
+        DPRINTF(InOrderCPU,
+                "Adding [tid:%i] to active threads list.\n", tid);
+        activeThreads.push_back(tid);
+        
+        activateThreadInPipeline(tid);
  
-    scheduleCpuEvent(EnableVPEs, NoFault, 0/*tid*/, vpe);
-}
+        thread[tid]->lastActivate = curTick;            
  
-void
-InOrderCPU::enableVPEs(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Concurrent Execution "
-            "virtual processors %i", vpe);
+        tcBase(tid)->setStatus(ThreadContext::Active);    
  
-    list<ThreadID>::iterator thread_it = currentThreads.begin();
+        wakeCPU();
  
-    while (thread_it != currentThreads.end()) {
-        if (!isThreadSuspended(*thread_it)) {
-            activateThread(*thread_it);
-        }
-        thread_it++;
+        numCtxtSwitches++;        
      }
  }
  
  void
-InOrderCPU::disableVirtProcElement(ThreadID tid, unsigned vpe)
+InOrderCPU::activateThreadInPipeline(ThreadID tid)
  {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling  "
-            "Disabling of concurrent virtual processor execution",
-            vpe);
-
-    scheduleCpuEvent(DisableVPEs, NoFault, 0/*tid*/, vpe);
+    for (int stNum=0; stNum < NumStages; stNum++) {
+        pipelineStage[stNum]->activateThread(tid);
+    }    
  }
  
  void
-InOrderCPU::disableVPEs(ThreadID tid, unsigned vpe)
+InOrderCPU::deactivateContext(ThreadID tid, int delay)
  {
-    DPRINTF(InOrderCPU, "[vpe:%i]: Disabling Concurrent Execution of "
-            "virtual processors %i", vpe);
-
-    unsigned base_vpe = TheISA::getVirtProcNum(tcBase(tid));
+    DPRINTF(InOrderCPU,"[tid:%i]: Deactivating ...\n", tid);
  
-    list<ThreadID>::iterator thread_it = activeThreads.begin();
-
-    vector<list<ThreadID>::iterator> removeList;
+    scheduleCpuEvent(DeactivateThread, NoFault, tid, dummyInst[tid], delay);
  
-    while (thread_it != activeThreads.end()) {
-        if (base_vpe != vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
-    }
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
  
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
-    }
+    _status = Running;
  }
  
  void
-InOrderCPU::enableMultiThreading(unsigned vpe)
+InOrderCPU::deactivateThread(ThreadID tid)
  {
-    // Schedule event to take place at end of cycle
-    DPRINTF(InOrderCPU, "[vpe:%i]: Scheduling Enable Multithreading on "
-            "virtual processor %i", vpe);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling deactivate thread.\n", tid);
  
-    scheduleCpuEvent(EnableThreads, NoFault, 0/*tid*/, vpe);
-}
+    if (isThreadActive(tid)) {
+        DPRINTF(InOrderCPU,"[tid:%i]: Removing from active threads list\n",
+                tid);
+        list<ThreadID>::iterator thread_it =
+            std::find(activeThreads.begin(), activeThreads.end(), tid);
  
-void
-InOrderCPU::enableThreads(unsigned vpe)
-{
-    DPRINTF(InOrderCPU, "[vpe:%i]: Enabling Multithreading on "
-            "virtual processor %i", vpe);
+        removePipelineStalls(*thread_it);
  
-    list<ThreadID>::iterator thread_it = currentThreads.begin();
+        activeThreads.erase(thread_it);
  
-    while (thread_it != currentThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            if (!isThreadSuspended(*thread_it)) {
-                activateThread(*thread_it);
-            }
-        }
-        thread_it++;
+        // Ideally, this should be triggered from the
+        // suspendContext/Thread functions
+        tcBase(tid)->setStatus(ThreadContext::Suspended);    
      }
-}
-void
-InOrderCPU::disableMultiThreading(ThreadID tid, unsigned vpe)
-{
-    // Schedule event to take place at end of cycle
-   DPRINTF(InOrderCPU, "[tid:%i]: Scheduling Disable Multithreading on "
-            "virtual processor %i", tid, vpe);
  
-    scheduleCpuEvent(DisableThreads, NoFault, tid, vpe);
+    assert(!isThreadActive(tid));    
  }
  
  void
-InOrderCPU::disableThreads(ThreadID tid, unsigned vpe)
+InOrderCPU::removePipelineStalls(ThreadID tid)
  {
-    DPRINTF(InOrderCPU, "[tid:%i]: Disabling Multithreading on "
-            "virtual processor %i", tid, vpe);
-
-    list<ThreadID>::iterator thread_it = activeThreads.begin();
-
-    vector<list<ThreadID>::iterator> removeList;
+    DPRINTF(InOrderCPU,"[tid:%i]: Removing all pipeline stalls\n",
+            tid);
  
-    while (thread_it != activeThreads.end()) {
-        if (TheISA::getVirtProcNum(tcBase(*thread_it)) == vpe) {
-            removeList.push_back(thread_it);
-        }
-        thread_it++;
+    for (int stNum = 0; stNum < NumStages ; stNum++) {
+        pipelineStage[stNum]->removeStalls(tid);
      }
  
-    for (int i = 0; i < removeList.size(); i++) {
-        activeThreads.erase(removeList[i]);
-    }
  }
  
  void
@@ -825,7 +874,8 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
  {
      DPRINTF(InOrderCPU,"[tid:%i]: Activating ...\n", tid);
  
-    scheduleCpuEvent(ActivateThread, NoFault, tid, 0/*vpe*/, delay);
+    
+    scheduleCpuEvent(ActivateThread, NoFault, tid, dummyInst[tid], delay);
  
      // Be sure to signal that there's some activity so the CPU doesn't
      // deschedule itself.
@@ -834,71 +884,73 @@ InOrderCPU::activateContext(ThreadID tid, int delay)
      _status = Running;
  }
  
-
  void
-InOrderCPU::suspendContext(ThreadID tid, int delay)
+InOrderCPU::activateNextReadyContext(int delay)
  {
-    scheduleCpuEvent(SuspendThread, NoFault, tid, 0/*vpe*/, delay);
-    //_status = Idle;
-}
+    DPRINTF(InOrderCPU,"Activating next ready thread\n");
  
-void
-InOrderCPU::suspendThread(ThreadID tid)
-{
-    DPRINTF(InOrderCPU,"[tid: %i]: Suspended ...\n", tid);
-    deactivateThread(tid);
-}
+    // NOTE: Add 5 to the event priority so that we always activate
+    // threads after we've finished deactivating, squashing,etc.
+    // other threads
+    scheduleCpuEvent(ActivateNextReadyThread, NoFault, 0/*tid*/, dummyInst[0], 
+                     delay, 5);
  
-void
-InOrderCPU::deallocateContext(ThreadID tid, int delay)
-{
-    scheduleCpuEvent(DeallocateThread, NoFault, tid, 0/*vpe*/, delay);
+    // Be sure to signal that there's some activity so the CPU doesn't
+    // deschedule itself.
+    activityRec.activity();
+
+    _status = Running;
  }
  
  void
-InOrderCPU::deallocateThread(ThreadID tid)
+InOrderCPU::haltContext(ThreadID tid, int delay)
  {
-    DPRINTF(InOrderCPU,"[tid:%i]: Deallocating ...", tid);
-
-    removeFromCurrentThreads(tid);
+    DPRINTF(InOrderCPU, "[tid:%i]: Calling Halt Context...\n", tid);
  
-    deactivateThread(tid);
+    scheduleCpuEvent(HaltThread, NoFault, tid, dummyInst[tid], delay);
  
-    squashThreadInPipeline(tid);
+    activityRec.activity();
  }
  
  void
-InOrderCPU::squashThreadInPipeline(ThreadID tid)
+InOrderCPU::haltThread(ThreadID tid)
  {
-    //Squash all instructions in each stage
-    for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
-        pipelineStage[stNum]->squash(0 /*seq_num*/, tid);
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Halted Threads List...\n", tid);
+    deactivateThread(tid);
+    squashThreadInPipeline(tid);   
+    haltedThreads.push_back(tid);    
+
+    tcBase(tid)->setStatus(ThreadContext::Halted);    
+
+    if (threadModel == SwitchOnCacheMiss) {        
+        activateNextReadyContext();    
      }
  }
  
  void
-InOrderCPU::haltContext(ThreadID tid, int delay)
+InOrderCPU::suspendContext(ThreadID tid, int delay)
  {
-    DPRINTF(InOrderCPU, "[tid:%i]: Halt context called.\n", tid);
-
-    // Halt is same thing as deallocate for now
-    // @TODO: Differentiate between halt & deallocate in the CPU
-    // model
-    deallocateContext(tid, delay);
+    scheduleCpuEvent(SuspendThread, NoFault, tid, dummyInst[tid], delay);
  }
  
  void
-InOrderCPU::insertThread(ThreadID tid)
+InOrderCPU::suspendThread(ThreadID tid)
  {
-    panic("Unimplemented Function\n.");
+    DPRINTF(InOrderCPU, "[tid:%i]: Placing on Suspended Threads List...\n", tid);
+    deactivateThread(tid);
+    suspendedThreads.push_back(tid);    
+    thread[tid]->lastSuspend = curTick;    
+
+    tcBase(tid)->setStatus(ThreadContext::Suspended);    
  }
  
  void
-InOrderCPU::removeThread(ThreadID tid)
+InOrderCPU::squashThreadInPipeline(ThreadID tid)
  {
-    DPRINTF(InOrderCPU, "Removing Thread %i from CPU.\n", tid);
-
-    /** Broadcast to CPU resources*/
+    //Squash all instructions in each stage
+    for (int stNum=NumStages - 1; stNum >= 0 ; stNum--) {
+        pipelineStage[stNum]->squash(0 /*seq_num*/, tid);
+    }
  }
  
  PipelineStage*
@@ -907,14 +959,6 @@ InOrderCPU::getPipeStage(int stage_num)
      return pipelineStage[stage_num];
  }
  
-
-void
-InOrderCPU::activateWhenReady(ThreadID tid)
-{
-    panic("Unimplemented Function\n.");
-}
-
-
  uint64_t
  InOrderCPU::readPC(ThreadID tid)
  {
@@ -1002,9 +1046,11 @@ InOrderCPU::readRegOtherThread(unsigned reg_idx, ThreadID tid)
          tid = TheISA::getTargetThread(tcBase(tid));
      }
  
-    if (reg_idx < FP_Base_DepTag) {                   // Integer Register File
+    if (reg_idx < FP_Base_DepTag) {                   
+        // Integer Register File
          return readIntReg(reg_idx, tid);
-    } else if (reg_idx < Ctrl_Base_DepTag) {          // Float Register File
+    } else if (reg_idx < Ctrl_Base_DepTag) {          
+        // Float Register File
          reg_idx -= FP_Base_DepTag;
          return readFloatRegBits(reg_idx, tid);
      } else {
@@ -1067,16 +1113,35 @@ InOrderCPU::addInst(DynInstPtr &inst)
      return --(instList[tid].end());
  }
  
+void 
+InOrderCPU::updateContextSwitchStats()
+{
+    // Set Average Stat Here, then reset to 0    
+    instsPerCtxtSwitch = instsPerSwitch;
+    instsPerSwitch = 0;
+}
+
+    
  void
  InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
  {
-    // Set the CPU's PCs - This contributes to the precise state of the CPU which can be used
-    // when restoring a thread to the CPU after a fork or after an exception
-    // @TODO: Set-Up Grad-Info/Committed-Info to let ThreadState know if it's a branch or not
+    // Set the CPU's PCs - This contributes to the precise state of the CPU 
+    // which can be used when restoring a thread to the CPU after after any
+    // type of context switching activity (fork, exception, etc.)
      setPC(inst->readPC(), tid);
      setNextPC(inst->readNextPC(), tid);
      setNextNPC(inst->readNextNPC(), tid);
  
+    if (inst->isControl()) {
+        thread[tid]->lastGradIsBranch = true;
+        thread[tid]->lastBranchPC = inst->readPC();
+        thread[tid]->lastBranchNextPC = inst->readNextPC();
+        thread[tid]->lastBranchNextNPC = inst->readNextNPC();        
+    } else {
+        thread[tid]->lastGradIsBranch = false;
+    }
+        
+
      // Finalize Trace Data For Instruction
      if (inst->traceData) {
          //inst->traceData->setCycle(curTick);
@@ -1087,9 +1152,9 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
          inst->traceData = NULL;
      }
  
-    // Set Last Graduated Instruction In Thread State
-    //thread[tid]->lastGradInst = inst;
-
+    // Increment active thread's instruction count
+    instsPerSwitch++;
+    
      // Increment thread-state's instruction count
      thread[tid]->numInst++;
  
@@ -1112,18 +1177,31 @@ InOrderCPU::instDone(DynInstPtr inst, ThreadID tid)
  
      // Broadcast to other resources an instruction
      // has been completed
-    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, tid);
+    resPool->scheduleEvent((CPUEventType)ResourcePool::InstGraduated, inst, 
+                           0, 0, tid);
  
      // Finally, remove instruction from CPU
      removeInst(inst);
  }
  
+// currently unused function, but substitute repetitive code w/this function
+// call
  void
  InOrderCPU::addToRemoveList(DynInstPtr &inst)
  {
      removeInstsThisCycle = true;
-
-    removeList.push(inst->getInstListIt());
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    }  else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
+    
  }
  
  void
@@ -1136,7 +1214,18 @@ InOrderCPU::removeInst(DynInstPtr &inst)
      removeInstsThisCycle = true;
  
      // Remove the instruction.
-    removeList.push(inst->getInstListIt());
+    if (!inst->isRemoveList()) {            
+        DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                "[sn:%lli] to remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+        inst->setRemoveList();        
+        removeList.push(inst->getInstListIt());
+    } else {
+        DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                "[sn:%lli], already on remove list\n",
+                inst->threadNumber, inst->readPC(), inst->seqNum);
+    }
+
  }
  
  void
@@ -1150,7 +1239,7 @@ InOrderCPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
  
      inst_iter--;
  
-    DPRINTF(InOrderCPU, "Deleting instructions from CPU instruction "
+    DPRINTF(InOrderCPU, "Squashing instructions from CPU instruction "
              "list that are from [tid:%i] and above [sn:%lli] (end=%lli).\n",
              tid, seq_num, (*inst_iter)->seqNum);
  
@@ -1180,8 +1269,22 @@ InOrderCPU::squashInstIt(const ListIt &instIt, ThreadID tid)
  
          (*instIt)->setSquashed();
  
-        removeList.push(instIt);
+        if (!(*instIt)->isRemoveList()) {            
+            DPRINTF(InOrderCPU, "Pushing instruction [tid:%i] PC %#x "
+                    "[sn:%lli] to remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+            (*instIt)->setRemoveList();        
+            removeList.push(instIt);
+        } else {
+            DPRINTF(InOrderCPU, "Ignoring instruction removal for [tid:%i] PC %#x "
+                    "[sn:%lli], already on remove list\n",
+                    (*instIt)->threadNumber, (*instIt)->readPC(), 
+                    (*instIt)->seqNum);
+        }
+    
      }
+    
  }
  
  
@@ -1193,7 +1296,7 @@ InOrderCPU::cleanUpRemovedInsts()
                  "[tid:%i] [sn:%lli] PC %#x\n",
                  (*removeList.front())->threadNumber,
                  (*removeList.front())->seqNum,
-                (*removeList.front())->readPC());
+               (*removeList.front())->readPC());
  
          DynInstPtr inst = *removeList.front();
          ThreadID tid = inst->threadNumber;
@@ -1221,11 +1324,6 @@ InOrderCPU::cleanUpRemovedInsts()
          instList[tid].erase(removeList.front());
  
          removeList.pop();
-
-        DPRINTF(RefCount, "pop from remove list: [sn:%i]: Refcount = %i.\n",
-                inst->seqNum,
-                0/*inst->curCount()*/);
-
      }
  
      removeInstsThisCycle = false;
@@ -1237,22 +1335,18 @@ InOrderCPU::cleanUpRemovedReqs()
      while (!reqRemoveList.empty()) {
          ResourceRequest *res_req = reqRemoveList.front();
  
-        DPRINTF(RefCount, "[tid:%i]: Removing Request, "
-                "[sn:%lli] [slot:%i] [stage_num:%i] [res:%s] [refcount:%i].\n",
+        DPRINTF(InOrderCPU, "[tid:%i] [sn:%lli]: Removing Request "
+                "[stage_num:%i] [res:%s] [slot:%i] [completed:%i].\n",
                  res_req->inst->threadNumber,
                  res_req->inst->seqNum,
-                res_req->getSlot(),
                  res_req->getStageNum(),
                  res_req->res->name(),
-                0/*res_req->inst->curCount()*/);
+                (res_req->isCompleted()) ? res_req->getComplSlot() : res_req->getSlot(),
+                res_req->isCompleted());
  
          reqRemoveList.pop();
  
          delete res_req;
-
-        DPRINTF(RefCount, "after remove request: [sn:%i]: Refcount = %i.\n",
-                res_req->inst->seqNum,
-                0/*res_req->inst->curCount()*/);
      }
  }
  
@@ -1297,8 +1391,14 @@ InOrderCPU::wakeCPU()
  
      DPRINTF(Activity, "Waking up CPU\n");
  
-    //@todo: figure out how to count idleCycles correctly
-    //idleCycles += (curTick - 1) - lastRunningCycle;
+    Tick extra_cycles = tickToCycles((curTick - 1) - lastRunningCycle);
+
+    idleCycles += extra_cycles;    
+    for (int stage_num = 0; stage_num < NumStages; stage_num++) {
+        pipelineStage[stage_num]->idleCycles += extra_cycles;
+    }    
+
+    numCycles += extra_cycles;
  
      mainEventQueue.schedule(&tickEvent, curTick);
  }
@@ -1380,7 +1480,8 @@ InOrderCPU::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
  {
      //@TODO: Generalize name "CacheUnit" to "MemUnit" just in case
      //       you want to run w/out caches?
-    CacheUnit *cache_res = dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
+    CacheUnit *cache_res = 
+        dynamic_cast<CacheUnit*>(resPool->getResource(dataPortIdx));
  
      return cache_res->read(inst, addr, data, flags);
  }
@@ -1483,14 +1584,16 @@ InOrderCPU::write(DynInstPtr inst, uint8_t data, Addr addr,
  
  template<>
  Fault
-InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
+                  uint64_t *res)
  {
      return write(inst, *(uint64_t*)&data, addr, flags, res);
  }
  
  template<>
  Fault
-InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
+                  uint64_t *res)
  {
      return write(inst, *(uint32_t*)&data, addr, flags, res);
  }
@@ -1498,7 +1601,8 @@ InOrderCPU::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64
  
  template<>
  Fault
-InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+InOrderCPU::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
+                  uint64_t *res)
  {
      return write(inst, (uint32_t)data, addr, flags, res);
  }
diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh

index 3320532ba4c674908707354a76a73635c04597ea..0c42f349ecd99d10a27cf9084a0bfc5d3ed851ca 100644 (file)
--- a/src/cpu/inorder/cpu.hh
+++ b/src/cpu/inorder/cpu.hh
@@ -89,17 +89,31 @@ class InOrderCPU : public BaseCPU
      typedef TimeBuffer<InterStageStruct> StageQueue;
  
      friend class Resource;
-
+    
    public:
      /** Constructs a CPU with the given parameters. */
      InOrderCPU(Params *params);
-
+    /* Destructor */
+    ~InOrderCPU();
+    
      /** CPU ID */
      int cpu_id;
  
+    // SE Mode ASIDs
+    ThreadID asid[ThePipeline::MaxThreads];
+
      /** Type of core that this is */
      std::string coreType;
  
+    // Only need for SE MODE
+    enum ThreadModel {
+        Single,
+        SMT,
+        SwitchOnCacheMiss
+    };
+    
+    ThreadModel threadModel;
+
      int readCpuId() { return cpu_id; }
  
      void setCpuId(int val) { cpu_id = val; }
@@ -117,7 +131,6 @@ class InOrderCPU : public BaseCPU
  
      /** Overall CPU status. */
      Status _status;
-
    private:
      /** Define TickEvent for the CPU */
      class TickEvent : public Event
@@ -144,9 +157,11 @@ class InOrderCPU : public BaseCPU
      void scheduleTickEvent(int delay)
      {
          if (tickEvent.squashed())
-          mainEventQueue.reschedule(&tickEvent, nextCycle(curTick + ticks(delay)));
+          mainEventQueue.reschedule(&tickEvent, 
+                                    nextCycle(curTick + ticks(delay)));
          else if (!tickEvent.scheduled())
-          mainEventQueue.schedule(&tickEvent, nextCycle(curTick + ticks(delay)));
+          mainEventQueue.schedule(&tickEvent, 
+                                  nextCycle(curTick + ticks(delay)));
      }
  
      /** Unschedule tick event, regardless of its current state. */
@@ -165,15 +180,13 @@ class InOrderCPU : public BaseCPU
      // pool event.
      enum CPUEventType {
          ActivateThread,
-        DeallocateThread,
+        ActivateNextReadyThread,
+        DeactivateThread,
+        HaltThread,
          SuspendThread,
-        DisableThreads,
-        EnableThreads,
-        DisableVPEs,
-        EnableVPEs,
          Trap,
          InstGraduated,
-        SquashAll,
+        SquashFromMemStall,
          UpdatePCs,
          NumCPUEvents
      };
@@ -189,22 +202,24 @@ class InOrderCPU : public BaseCPU
        public:
          CPUEventType cpuEventType;
          ThreadID tid;
-        unsigned vpe;
+        DynInstPtr inst;
          Fault fault;
-
+        unsigned vpe;
+        
        public:
          /** Constructs a CPU event. */
          CPUEvent(InOrderCPU *_cpu, CPUEventType e_type, Fault fault,
-                 ThreadID _tid, unsigned _vpe);
+                 ThreadID _tid, DynInstPtr inst, unsigned event_pri_offset);
  
          /** Set Type of Event To Be Scheduled */
          void setEvent(CPUEventType e_type, Fault _fault, ThreadID _tid,
-                      unsigned _vpe)
+                      DynInstPtr _inst)
          {
              fault = _fault;
              cpuEventType = e_type;
              tid = _tid;
-            vpe = _vpe;
+            inst = _inst;
+            vpe = 0;            
          }
  
          /** Processes a resource event. */
@@ -222,17 +237,21 @@ class InOrderCPU : public BaseCPU
  
      /** Schedule a CPU Event */
      void scheduleCpuEvent(CPUEventType cpu_event, Fault fault, ThreadID tid,
-                          unsigned vpe, unsigned delay = 0);
+                          DynInstPtr inst, unsigned delay = 0,
+                          unsigned event_pri_offset = 0);
  
    public:
      /** Interface between the CPU and CPU resources. */
      ResourcePool *resPool;
  
-    /** Instruction used to signify that there is no *real* instruction in buffer slot */
+    /** Instruction used to signify that there is no *real* instruction in 
+        buffer slot */
+    DynInstPtr dummyInst[ThePipeline::MaxThreads];
      DynInstPtr dummyBufferInst;
+    DynInstPtr dummyReqInst;
  
      /** Used by resources to signify a denied access to a resource. */
-    ResourceRequest *dummyReq;
+    ResourceRequest *dummyReq[ThePipeline::MaxThreads];
  
      /** Identifies the resource id that identifies a fetch
       * access unit.
@@ -331,26 +350,39 @@ class InOrderCPU : public BaseCPU
      void trap(Fault fault, ThreadID tid, int delay = 0);
      void trapCPU(Fault fault, ThreadID tid);
  
-    /** Setup CPU to insert a thread's context */
-    void insertThread(ThreadID tid);
-
-    /** Remove all of a thread's context from CPU */
-    void removeThread(ThreadID tid);
-
      /** Add Thread to Active Threads List. */
      void activateContext(ThreadID tid, int delay = 0);
      void activateThread(ThreadID tid);
+    void activateThreadInPipeline(ThreadID tid);
+    
+    /** Add Thread to Active Threads List. */
+    void activateNextReadyContext(int delay = 0);
+    void activateNextReadyThread();
+
+    /** Remove from Active Thread List */
+    void deactivateContext(ThreadID tid, int delay = 0);
+    void deactivateThread(ThreadID tid);
  
-    /** Remove Thread from Active Threads List */
+    /** Suspend Thread, Remove from Active Threads List, Add to Suspend List */
      void suspendContext(ThreadID tid, int delay = 0);
      void suspendThread(ThreadID tid);
  
-    /** Remove Thread from Active Threads List &&
-     *  Remove Thread Context from CPU.
+    /** Halt Thread, Remove from Active Thread List, Place Thread on Halted 
+     *  Threads List 
       */
-    void deallocateContext(ThreadID tid, int delay = 0);
-    void deallocateThread(ThreadID tid);
-    void deactivateThread(ThreadID tid);
+    void haltContext(ThreadID tid, int delay = 0);
+    void haltThread(ThreadID tid);
+
+    /** squashFromMemStall() - sets up a squash event
+     *  squashDueToMemStall() - squashes pipeline
+     *  @note: maybe squashContext/squashThread would be better?
+     */
+    void squashFromMemStall(DynInstPtr inst, ThreadID tid, int delay = 0);
+    void squashDueToMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
+
+    void removePipelineStalls(ThreadID tid);
+    void squashThreadInPipeline(ThreadID tid);
+    void squashBehindMemStall(int stage_num, InstSeqNum seq_num, ThreadID tid);    
  
      PipelineStage* getPipeStage(int stage_num);
  
@@ -361,37 +393,6 @@ class InOrderCPU : public BaseCPU
          return 0;
      }
  
-    /** Remove Thread from Active Threads List &&
-     *  Remove Thread Context from CPU.
-     */
-    void haltContext(ThreadID tid, int delay = 0);
-
-    void removePipelineStalls(ThreadID tid);
-
-    void squashThreadInPipeline(ThreadID tid);
-
-    /// Notify the CPU to enable a virtual processor element.
-    virtual void enableVirtProcElement(unsigned vpe);
-    void enableVPEs(unsigned vpe);
-
-    /// Notify the CPU to disable a virtual processor element.
-    virtual void disableVirtProcElement(ThreadID tid, unsigned vpe);
-    void disableVPEs(ThreadID tid, unsigned vpe);
-
-    /// Notify the CPU that multithreading is enabled.
-    virtual void enableMultiThreading(unsigned vpe);
-    void enableThreads(unsigned vpe);
-
-    /// Notify the CPU that multithreading is disabled.
-    virtual void disableMultiThreading(ThreadID tid, unsigned vpe);
-    void disableThreads(ThreadID tid, unsigned vpe);
-
-    /** Activate a Thread When CPU Resources are Available. */
-    void activateWhenReady(ThreadID tid);
-
-    /** Add or Remove a Thread Context in the CPU. */
-    void doContextSwitch();
-
      /** Update The Order In Which We Process Threads. */
      void updateThreadPriority();
  
@@ -420,7 +421,11 @@ class InOrderCPU : public BaseCPU
      /** Get & Update Next Event Number */
      InstSeqNum getNextEventNum()
      {
+#ifdef DEBUG
          return cpuEventNum++;
+#else
+        return 0;
+#endif
      }
  
      /** Register file accessors  */
@@ -550,8 +555,8 @@ class InOrderCPU : public BaseCPU
       */
      std::queue<ListIt> removeList;
  
-    /** List of all the resource requests that will be removed at the end of this
-     *  cycle.
+    /** List of all the resource requests that will be removed at the end 
+     *  of this cycle.
       */
      std::queue<ResourceRequest*> reqRemoveList;
  
@@ -585,18 +590,19 @@ class InOrderCPU : public BaseCPU
      /** Active Threads List */
      std::list<ThreadID> activeThreads;
  
-    /** Current Threads List */
-    std::list<ThreadID> currentThreads;
+    /** Ready Threads List */
+    std::list<ThreadID> readyThreads;
  
      /** Suspended Threads List */
      std::list<ThreadID> suspendedThreads;
  
-    /** Thread Status Functions (Unused Currently) */
-    bool isThreadInCPU(ThreadID tid);
+    /** Halted Threads List */
+    std::list<ThreadID> haltedThreads;
+
+    /** Thread Status Functions */
      bool isThreadActive(ThreadID tid);
+    bool isThreadReady(ThreadID tid);
      bool isThreadSuspended(ThreadID tid);
-    void addToCurrentThreads(ThreadID tid);
-    void removeFromCurrentThreads(ThreadID tid);
  
    private:
      /** The activity recorder; used to tell if the CPU has any
@@ -609,6 +615,19 @@ class InOrderCPU : public BaseCPU
      /** Number of Active Threads in the CPU */
      ThreadID numActiveThreads() { return activeThreads.size(); }
  
+    /** Thread id of active thread
+     *  Only used for SwitchOnCacheMiss model. 
+     *  Assumes only 1 thread active
+     */
+    ThreadID activeThreadId() 
+    { 
+        if (numActiveThreads() > 0)
+            return activeThreads.front();
+        else
+            return InvalidThreadID;
+    }
+    
+     
      /** Records that there was time buffer activity this cycle. */
      void activityThisCycle() { activityRec.activity(); }
  
@@ -627,13 +646,14 @@ class InOrderCPU : public BaseCPU
      virtual void wakeup();
  #endif
  
-    /** Gets a free thread id. Use if thread ids change across system. */
-    ThreadID getFreeTid();
-
      // LL/SC debug functionality
      unsigned stCondFails;
-    unsigned readStCondFailures() { return stCondFails; }
-    unsigned setStCondFailures(unsigned st_fails) { return stCondFails = st_fails; }
+
+    unsigned readStCondFailures() 
+    { return stCondFails; }
+
+    unsigned setStCondFailures(unsigned st_fails) 
+    { return stCondFails = st_fails; }
  
      /** Returns a pointer to a thread context. */
      ThreadContext *tcBase(ThreadID tid = 0)
@@ -663,9 +683,16 @@ class InOrderCPU : public BaseCPU
      /** The global sequence number counter. */
      InstSeqNum globalSeqNum[ThePipeline::MaxThreads];
  
+#ifdef DEBUG
      /** The global event number counter. */
      InstSeqNum cpuEventNum;
  
+    /** Number of resource requests active in CPU **/
+    unsigned resReqCount;
+
+    Stats::Scalar maxResReqCount;    
+#endif
+
      /** Counter of how many stages have completed switching out. */
      int switchCount;
  
@@ -684,18 +711,14 @@ class InOrderCPU : public BaseCPU
      /** Per-Stage Instruction Tracing */
      bool stageTracing;
  
-    /** Is there a context switch pending? */
-    bool contextSwitch;
-
-    /** Threads Scheduled to Enter CPU */
-    std::list<int> cpuWaitList;
-
      /** The cycle that the CPU was last running, used for statistics. */
      Tick lastRunningCycle;
  
-    /** Number of Virtual Processors the CPU can process */
-    unsigned numVirtProcs;
-
+    void updateContextSwitchStats();    
+    unsigned instsPerSwitch;    
+    Stats::Average instsPerCtxtSwitch;    
+    Stats::Scalar numCtxtSwitches;
+    
      /** Update Thread , used for statistic purposes*/
      inline void tickThreadStats();
  
@@ -708,9 +731,15 @@ class InOrderCPU : public BaseCPU
      /** Stat for total number of times the CPU is descheduled. */
      Stats::Scalar timesIdled;
  
-    /** Stat for total number of cycles the CPU spends descheduled. */
+    /** Stat for total number of cycles the CPU spends descheduled or no stages active. */
      Stats::Scalar idleCycles;
  
+    /** Stat for total number of cycles the CPU is active. */
+    Stats::Scalar runCycles;
+
+    /** Percentage of cycles a stage was active */
+    Stats::Formula activity;
+
      /** Stat for the number of committed instructions per thread. */
      Stats::Vector committedInsts;
  
diff --git a/src/cpu/inorder/first_stage.cc b/src/cpu/inorder/first_stage.cc

index 8bd703c56f03381ac402deedcc8da71ce6cd66e1..658ce37d39ef11c252a98a50a1b68c672f8b90dd 100644 (file)
--- a/src/cpu/inorder/first_stage.cc
+++ b/src/cpu/inorder/first_stage.cc
@@ -67,11 +67,12 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
  
      // Clear the instruction list and skid buffer in case they have any
      // insts in them.
-    DPRINTF(InOrderStage, "Removing instructions from stage instruction list.\n");
+    DPRINTF(InOrderStage, "Removing instructions from stage instruction "
+            "list.\n");
      while (!insts[tid].empty()) {
          if (insts[tid].front()->seqNum <= squash_seq_num) {
-            DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because it's <= "
-                    "squashing seqNum %i.\n",
+            DPRINTF(InOrderStage,"[tid:%i]: Cannot remove [sn:%i] because "
+                    "it's <= squashing seqNum %i.\n",
                      tid,
                      insts[tid].front()->seqNum,
                      squash_seq_num);
@@ -82,8 +83,9 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
                      insts[tid].size());
              break;
          }
-        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
-                tid, insts[tid].front()->seqNum, insts[tid].front()->PC);
+        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
+                "PC %08p.\n", tid, insts[tid].front()->seqNum, 
+                insts[tid].front()->PC);
          insts[tid].pop();
      }
  
@@ -93,6 +95,18 @@ FirstStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
      cpu->removeInstsUntil(squash_seq_num, tid);
  }
  
+void 
+FirstStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
+{
+    // Need to preserve the stalling instruction in first-stage
+    // since the squash() from first stage also removes
+    // the instruction from the CPU (removeInstsUntil). If that
+    // functionality gets changed then you can move this offset.
+    // (stalling instruction = seq_num + 1)
+    squash(seq_num+1, tid);
+}
+
+
  void
  FirstStage::processStage(bool &status_change)
  {
@@ -104,8 +118,9 @@ FirstStage::processStage(bool &status_change)
          status_change =  checkSignalsAndUpdate(tid) || status_change;
      }
  
-    for (int threadFetched = 0; threadFetched < numFetchingThreads;
-         threadFetched++) {
+    for (int insts_fetched = 0; 
+         insts_fetched < stageWidth && canSendInstToStage(1); 
+         insts_fetched++) {
          ThreadID tid = getFetchingThread(fetchPolicy);
  
          if (tid >= 0) {
@@ -115,16 +130,28 @@ FirstStage::processStage(bool &status_change)
              DPRINTF(InOrderStage, "No more threads to fetch from.\n");
          }
      }
+
+    if (instsProcessed > 0) {
+        ++runCycles;
+        idle = false;        
+    } else {
+        ++idleCycles;
+        idle = true;        
+    }
+
  }
  
-//@TODO: Note in documentation, that when you make a pipeline stage change, then
-//make sure you change the first stage too
+//@TODO: Note in documentation, that when you make a pipeline stage change, 
+//then make sure you change the first stage too
  void
  FirstStage::processInsts(ThreadID tid)
  {
      bool all_reqs_completed = true;
  
-    for (int insts_fetched = 0; insts_fetched < stageWidth && canSendInstToStage(1); insts_fetched++) {
+    for (int insts_fetched = 0; 
+         insts_fetched < stageWidth && canSendInstToStage(1); 
+         insts_fetched++) {
+
          DynInstPtr inst;
          bool new_inst = false;
  
@@ -150,26 +177,21 @@ FirstStage::processInsts(ThreadID tid)
              inst->traceData = NULL;
  #endif      // TRACING_ON
  
-            DPRINTF(RefCount, "creation: [tid:%i]: [sn:%i]: Refcount = %i.\n",
-                    inst->readTid(),
-                    inst->seqNum,
-                    0/*inst->curCount()*/);
-
              // Add instruction to the CPU's list of instructions.
              inst->setInstListIt(cpu->addInst(inst));
  
-            DPRINTF(RefCount, "after add to CPU List: [tid:%i]: [sn:%i]: Refcount = %i.\n",
-                    inst->readTid(),
-                    inst->seqNum,
-                    0/*inst->curCount()*/);
-
              // Create Front-End Resource Schedule For Instruction
              ThePipeline::createFrontEndSchedule(inst);
          }
  
-        // Don't let instruction pass to next stage if it hasnt completed
-        // all of it's requests for this stage.
-        all_reqs_completed = processInstSchedule(inst);
+        int reqs_processed = 0;            
+        all_reqs_completed = processInstSchedule(inst, reqs_processed);
+
+        // If the instruction isnt squashed & we've completed one request
+        // Then we can officially count this instruction toward the stage's 
+        // bandwidth count
+        if (reqs_processed > 0)
+            instsProcessed++;
  
          if (!all_reqs_completed) {
              if (new_inst) {
@@ -184,7 +206,6 @@ FirstStage::processInsts(ThreadID tid)
          }
  
          sendInstToNextStage(inst);
-        //++stageProcessedInsts;
      }
  
      // Record that stage has written to the time buffer for activity
@@ -197,11 +218,12 @@ FirstStage::processInsts(ThreadID tid)
  ThreadID
  FirstStage::getFetchingThread(FetchPriority &fetch_priority)
  {
-    if (numThreads > 1) {
-        switch (fetch_priority) {
+    ThreadID num_active_threads = cpu->numActiveThreads();
  
+    if (num_active_threads > 1) {
+        switch (fetch_priority) {
            case SingleThread:
-            return 0;
+            return cpu->activeThreadId();
  
            case RoundRobin:
              return roundRobin();
@@ -209,7 +231,7 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
            default:
              return InvalidThreadID;
          }
-    } else {
+    } else if (num_active_threads == 1) {
          ThreadID tid = *activeThreads->begin();
  
          if (stageStatus[tid] == Running ||
@@ -218,8 +240,9 @@ FirstStage::getFetchingThread(FetchPriority &fetch_priority)
          } else {
              return InvalidThreadID;
          }
-    }
-
+    } else {
+        return InvalidThreadID;
+    }    
  }
  
  ThreadID
diff --git a/src/cpu/inorder/first_stage.hh b/src/cpu/inorder/first_stage.hh

index 2a69678e4750bc39720eec29e63d5dbe305d3869..383b799f3a186fc55dd27b93619bafdadc6d27c8 100644 (file)
--- a/src/cpu/inorder/first_stage.hh
+++ b/src/cpu/inorder/first_stage.hh
@@ -61,6 +61,8 @@ class FirstStage : public PipelineStage {
      /** Squash Instructions Above a Seq. Num */
      void squash(InstSeqNum squash_seq_num, ThreadID tid);
  
+    void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
+
      /** There are no insts. coming from previous stages, so there is
       * no need to sort insts here
       */
diff --git a/src/cpu/inorder/inorder_dyn_inst.cc b/src/cpu/inorder/inorder_dyn_inst.cc

index 5ab8396151ec6140588234573b353c0aad277f61..1b55c90e08ac32a5a453f7ad5286041dd99d50cf 100644 (file)
--- a/src/cpu/inorder/inorder_dyn_inst.cc
+++ b/src/cpu/inorder/inorder_dyn_inst.cc
@@ -111,7 +111,13 @@ InOrderDynInst::initVars()
  {
      fetchMemReq = NULL;
      dataMemReq = NULL;
-
+    splitMemData = NULL;
+    split2ndAddr = 0;
+    split2ndAccess = false;
+    splitInst = false;
+    splitInstSked = false;    
+    splitFinishCnt = 0;
+    
      effAddr = 0;
      physEffAddr = 0;
  
@@ -159,7 +165,7 @@ InOrderDynInst::initVars()
  
      // Update Instruction Count for this instruction
      ++instcount;
-    if (instcount > 500) {
+    if (instcount > 100) {
          fatal("Number of Active Instructions in CPU is too high. "
                  "(Not Dereferencing Ptrs. Correctly?)\n");
      }
@@ -170,6 +176,12 @@ InOrderDynInst::initVars()
              threadNumber, seqNum, instcount);
  }
  
+void
+InOrderDynInst::resetInstCount()
+{
+    instcount = 0;
+}
+
  
  InOrderDynInst::~InOrderDynInst()
  {
@@ -187,6 +199,10 @@ InOrderDynInst::~InOrderDynInst()
          delete traceData;
      }
  
+    if (splitMemData) {
+        delete [] splitMemData;
+    }
+    
      fault = NoFault;
  
      --instcount;
@@ -583,30 +599,6 @@ InOrderDynInst::deallocateContext(int thread_num)
      this->cpu->deallocateContext(thread_num);
  }
  
-void
-InOrderDynInst::enableVirtProcElement(unsigned vpe)
-{
-    this->cpu->enableVirtProcElement(vpe);
-}
-
-void
-InOrderDynInst::disableVirtProcElement(unsigned vpe)
-{
-    this->cpu->disableVirtProcElement(threadNumber, vpe);
-}
-
-void
-InOrderDynInst::enableMultiThreading(unsigned vpe)
-{
-    this->cpu->enableMultiThreading(vpe);
-}
-
-void
-InOrderDynInst::disableMultiThreading(unsigned vpe)
-{
-    this->cpu->disableMultiThreading(threadNumber, vpe);
-}
-
  template<class T>
  inline Fault
  InOrderDynInst::read(Addr addr, T &data, unsigned flags)
diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh

index 522b4e8d7274160f4003c46adbe6936f8a5a7cf7..8c9cd69e0b1fb5e58c17d7280b8c7b76949786d7 100644 (file)
--- a/src/cpu/inorder/inorder_dyn_inst.hh
+++ b/src/cpu/inorder/inorder_dyn_inst.hh
@@ -164,6 +164,7 @@ class InOrderDynInst : public FastAlloc, public RefCounted
                                   /// instructions ahead of it
          SerializeAfter,          /// Needs to serialize instructions behind it
          SerializeHandled,        /// Serialization has been handled
+        RemoveList,               /// Is Instruction on Remove List?
          NumStatus
      };
  
@@ -330,6 +331,20 @@ class InOrderDynInst : public FastAlloc, public RefCounted
    public:
      Tick memTime;
  
+    PacketDataPtr splitMemData;
+    RequestPtr splitMemReq;    
+    int splitTotalSize;
+    int split2ndSize;
+    Addr split2ndAddr;
+    bool split2ndAccess;
+    uint8_t split2ndData;
+    PacketDataPtr split2ndDataPtr;
+    unsigned split2ndFlags;
+    bool splitInst;
+    int splitFinishCnt;
+    uint64_t *split2ndStoreDataPtr;    
+    bool splitInstSked;
+
      ////////////////////////////////////////////////////////////
      //
      //  BASE INSTRUCTION INFORMATION.
@@ -468,7 +483,10 @@ class InOrderDynInst : public FastAlloc, public RefCounted
          if (!resSched.empty()) {
              ThePipeline::ScheduleEntry* sked = resSched.top();
              resSched.pop();
-            delete sked;
+            if (sked != 0) {
+                delete sked;
+                
+            }            
          }
      }
  
@@ -515,12 +533,6 @@ class InOrderDynInst : public FastAlloc, public RefCounted
      ////////////////////////////////////////////////////////////
      virtual void deallocateContext(int thread_num);
  
-    virtual void enableVirtProcElement(unsigned vpe);
-    virtual void disableVirtProcElement(unsigned vpe);
-
-    virtual void enableMultiThreading(unsigned vpe);
-    virtual void disableMultiThreading(unsigned vpe);
-
      ////////////////////////////////////////////////////////////
      //
      //  PROGRAM COUNTERS - PC/NPC/NPC
@@ -905,6 +917,12 @@ class InOrderDynInst : public FastAlloc, public RefCounted
      /** Returns whether or not the entry is on the CPU Reg Dep Map */
      bool isRegDepEntry() const { return status[RegDepMapEntry]; }
  
+    /** Sets this instruction as entered on the CPU Reg Dep Map */
+    void setRemoveList() { status.set(RemoveList); }
+
+    /** Returns whether or not the entry is on the CPU Reg Dep Map */
+    bool isRemoveList() const { return status[RemoveList]; }
+
      /** Sets this instruction as completed. */
      void setCompleted() { status.set(Completed); }
  
@@ -1022,14 +1040,15 @@ class InOrderDynInst : public FastAlloc, public RefCounted
      /** Count of total number of dynamic instructions. */
      static int instcount;
  
+    void resetInstCount();
+    
      /** Dumps out contents of this BaseDynInst. */
      void dump();
  
      /** Dumps out contents of this BaseDynInst into given string. */
      void dump(std::string &outstring);
  
-
-  //inline int curCount() { return curCount(); }
+    //inline int curCount() { return curCount(); }
  };
  
  
diff --git a/src/cpu/inorder/pipeline_stage.cc b/src/cpu/inorder/pipeline_stage.cc

index dc0378bf38efcecd722647682d0607f67b0b4ac6..dcf4d81bf40919ebd338fffd00b4d9a240eec684 100644 (file)
--- a/src/cpu/inorder/pipeline_stage.cc
+++ b/src/cpu/inorder/pipeline_stage.cc
@@ -42,8 +42,11 @@ PipelineStage::PipelineStage(Params *params, unsigned stage_num)
      : stageNum(stage_num), stageWidth(ThePipeline::StageWidth),
        numThreads(ThePipeline::MaxThreads), _status(Inactive),
        stageBufferMax(ThePipeline::interStageBuffSize[stage_num]),
-      prevStageValid(false), nextStageValid(false)
+      prevStageValid(false), nextStageValid(false), idle(false)
  {
+    switchedOutBuffer.resize(ThePipeline::MaxThreads);
+    switchedOutValid.resize(ThePipeline::MaxThreads);
+    
      init(params);
  }
  
@@ -69,41 +72,27 @@ PipelineStage::init(Params *params)
  std::string
  PipelineStage::name() const
  {
-    return cpu->name() + ".stage-" + to_string(stageNum);
+     return cpu->name() + ".stage-" + to_string(stageNum);
  }
  
  
  void
  PipelineStage::regStats()
  {
-/*    stageIdleCycles
-        .name(name() + ".IdleCycles")
-        .desc("Number of cycles stage is idle")
-        .prereq(stageIdleCycles);
-    stageBlockedCycles
-        .name(name() + ".BlockedCycles")
-        .desc("Number of cycles stage is blocked")
-        .prereq(stageBlockedCycles);
-    stageRunCycles
-        .name(name() + ".RunCycles")
-        .desc("Number of cycles stage is running")
-        .prereq(stageRunCycles);
-    stageUnblockCycles
-        .name(name() + ".UnblockCycles")
-        .desc("Number of cycles stage is unblocking")
-        .prereq(stageUnblockCycles);
-    stageSquashCycles
-        .name(name() + ".SquashCycles")
-        .desc("Number of cycles stage is squashing")
-        .prereq(stageSquashCycles);
-    stageProcessedInsts
-        .name(name() + ".ProcessedInsts")
-        .desc("Number of instructions handled by stage")
-        .prereq(stageProcessedInsts);
-    stageSquashedInsts
-        .name(name() + ".SquashedInsts")
-        .desc("Number of squashed instructions handled by stage")
-        .prereq(stageSquashedInsts);*/
+   idleCycles
+        .name(name() + ".idleCycles")
+       .desc("Number of cycles 0 instructions are processed.");
+   
+    runCycles
+        .name(name() + ".runCycles")
+        .desc("Number of cycles 1+ instructions are processed.");
+
+    utilization
+        .name(name() + ".utilization")
+        .desc("Percentage of cycles stage was utilized (processing insts).")
+        .precision(6);
+    utilization = (runCycles / cpu->numCycles) * 100;
+    
  }
  
  
@@ -112,8 +101,6 @@ PipelineStage::setCPU(InOrderCPU *cpu_ptr)
  {
      cpu = cpu_ptr;
  
-    dummyBufferInst = new InOrderDynInst(cpu_ptr, NULL, 0, 0, 0);
-
      DPRINTF(InOrderStage, "Set CPU pointer.\n");
  
      tracer = dynamic_cast<Trace::InOrderTrace *>(cpu->getTracer());
@@ -267,7 +254,8 @@ PipelineStage::isBlocked(ThreadID tid)
  bool
  PipelineStage::block(ThreadID tid)
  {
-    DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to previous stages.\n", tid);
+    DPRINTF(InOrderStage, "[tid:%d]: Blocking, sending block signal back to "
+            "previous stages.\n", tid);
  
      // Add the current inputs to the skid buffer so they can be
      // reprocessed when this stage unblocks.
@@ -296,7 +284,8 @@ PipelineStage::block(ThreadID tid)
  void
  PipelineStage::blockDueToBuffer(ThreadID tid)
  {
-    DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to next stage.\n", tid);
+    DPRINTF(InOrderStage, "[tid:%d]: Blocking instructions from passing to "
+            "next stage.\n", tid);
  
      if (stageStatus[tid] != Blocked) {
          // Set the status to Blocked.
@@ -334,8 +323,9 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
  {
      if (cpu->squashSeqNum[tid] < inst->seqNum &&
          cpu->lastSquashCycle[tid] == curTick){
-        DPRINTF(Resource, "Ignoring [sn:%i] squash signal due to another stage's squash "
-                "signal for after [sn:%i].\n", inst->seqNum, cpu->squashSeqNum[tid]);
+        DPRINTF(Resource, "Ignoring [sn:%i] branch squash signal due to "
+                "another stage's squash signal for after [sn:%i].\n", 
+                inst->seqNum, cpu->squashSeqNum[tid]);
      } else {
          // Send back mispredict information.
          toPrevStages->stageInfo[stageNum][tid].branchMispredict = true;
@@ -346,20 +336,28 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
  
  
  #if ISA_HAS_DELAY_SLOT
-        toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextNPC() !=
+        toPrevStages->stageInfo[stageNum][tid].branchTaken = 
+            inst->readNextNPC() !=
              (inst->readNextPC() + sizeof(TheISA::MachInst));
-        toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->bdelaySeqNum;
+
+        toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = 
+            inst->bdelaySeqNum;
+
          InstSeqNum squash_seq_num = inst->bdelaySeqNum;
  #else
-        toPrevStages->stageInfo[stageNum][tid].branchTaken = inst->readNextPC() !=
+        toPrevStages->stageInfo[stageNum][tid].branchTaken = 
+            inst->readNextPC() !=
              (inst->readPC() + sizeof(TheISA::MachInst));
+
          toPrevStages->stageInfo[stageNum][tid].bdelayDoneSeqNum = inst->seqNum;
          InstSeqNum squash_seq_num = inst->seqNum;
  #endif
  
-        DPRINTF(InOrderStage, "Target being re-set to %08p\n", inst->readPredTarg());
-        DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], due to [sn:%i] "
-                "branch.\n", tid, squash_seq_num, inst->seqNum);
+        DPRINTF(InOrderStage, "Target being re-set to %08p\n", 
+                inst->readPredTarg());
+        DPRINTF(InOrderStage, "[tid:%i]: Squashing after [sn:%i], "
+                "due to [sn:%i] branch.\n", tid, squash_seq_num, 
+                inst->seqNum);
  
          // Save squash num for later stage use
          cpu->squashSeqNum[tid] = squash_seq_num;
@@ -367,6 +365,12 @@ PipelineStage::squashDueToBranch(DynInstPtr &inst, ThreadID tid)
      }
  }
  
+void
+PipelineStage::squashDueToMemStall(InstSeqNum seq_num, ThreadID tid)
+{
+    squash(seq_num, tid);    
+}
+
  void
  PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
  {
@@ -376,12 +380,15 @@ PipelineStage::squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid)
      for (int i=0; i < prevStage->size; i++) {
          if (prevStage->insts[i]->threadNumber == tid &&
              prevStage->insts[i]->seqNum > squash_seq_num) {
+            // Change Comment to Annulling previous instruction
              DPRINTF(InOrderStage, "[tid:%i]: Squashing instruction, "
                      "[sn:%i] PC %08p.\n",
                      tid,
                      prevStage->insts[i]->seqNum,
                      prevStage->insts[i]->readPC());
              prevStage->insts[i]->setSquashed();
+
+            prevStage->insts[i] = cpu->dummyBufferInst;
          }
      }
  }
@@ -394,18 +401,20 @@ PipelineStage::squash(InstSeqNum squash_seq_num, ThreadID tid)
  
      squashPrevStageInsts(squash_seq_num, tid);
  
-    DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage skidbuffer.\n",
-            tid);
+    DPRINTF(InOrderStage, "[tid:%i]: Removing instructions from incoming stage"
+            " skidbuffer.\n", tid);
      while (!skidBuffer[tid].empty()) {
          if (skidBuffer[tid].front()->seqNum <= squash_seq_num) {
              DPRINTF(InOrderStage, "[tid:%i]: Cannot remove skidBuffer "
-                    "instructions before delay slot [sn:%i]. %i insts"
-                    "left.\n", tid, squash_seq_num,
+                    "instructions (starting w/[sn:%i]) before delay slot "
+                    "[sn:%i]. %i insts left.\n", tid, 
+                    skidBuffer[tid].front()->seqNum, squash_seq_num,
                      skidBuffer[tid].size());
              break;
          }
-        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] PC %08p.\n",
-                tid, skidBuffer[tid].front()->seqNum, skidBuffer[tid].front()->PC);
+        DPRINTF(InOrderStage, "[tid:%i]: Removing instruction, [sn:%i] "
+                " PC %08p.\n", tid, skidBuffer[tid].front()->seqNum, 
+                skidBuffer[tid].front()->PC);
          skidBuffer[tid].pop();
      }
  
@@ -427,7 +436,8 @@ PipelineStage::stageBufferAvail()
      int avail = stageBufferMax - total -0;// incoming_insts;
  
      if (avail < 0)
-        fatal("stageNum %i:stageBufferAvail() < 0...stBMax=%i,total=%i,incoming=%i=>%i",
+        fatal("stageNum %i:stageBufferAvail() < 0..."
+              "stBMax=%i,total=%i,incoming=%i=>%i",
                stageNum, stageBufferMax, total, incoming_insts, avail);
  
      return avail;
@@ -443,7 +453,8 @@ PipelineStage::canSendInstToStage(unsigned stage_num)
      }
  
      if (!buffer_avail && nextStageQueueValid(stage_num)) {
-        DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", stageNum + 1);
+        DPRINTF(InOrderStall, "STALL: No room in stage %i buffer.\n", 
+                stageNum + 1);
      }
  
      return buffer_avail;
@@ -461,8 +472,9 @@ PipelineStage::skidInsert(ThreadID tid)
  
          assert(tid == inst->threadNumber);
  
-        DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage skidBuffer %i\n",
-                tid, inst->seqNum, inst->readPC(), inst->threadNumber);
+        DPRINTF(InOrderStage,"[tid:%i]: Inserting [sn:%lli] PC:%#x into stage "
+                "skidBuffer %i\n", tid, inst->seqNum, inst->readPC(), 
+                inst->threadNumber);
  
          skidBuffer[tid].push(inst);
      }
@@ -533,6 +545,39 @@ PipelineStage::updateStatus()
      }
  }
  
+void 
+PipelineStage::activateThread(ThreadID tid)
+{    
+    if (cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+        if (!switchedOutValid[tid]) {
+            DPRINTF(InOrderStage, "[tid:%i] No instruction available in "
+                    "switch out buffer.\n", tid);        
+        } else {
+            DynInstPtr inst = switchedOutBuffer[tid];
+
+            DPRINTF(InOrderStage,"[tid:%i]: Re-Inserting [sn:%lli] PC:%#x into "
+                    "stage skidBuffer %i\n", tid, inst->seqNum, 
+                    inst->readPC(), inst->threadNumber);
+
+            // Make instruction available for pipeline processing
+            skidBuffer[tid].push(inst);            
+
+            // Update PC so that we start fetching after this instruction to prevent
+            // "double"-execution of instructions
+            cpu->resPool->scheduleEvent((InOrderCPU::CPUEventType)
+                                        ResourcePool::UpdateAfterContextSwitch, 
+                                        inst, 0, 0, tid);
+
+            // Clear switchout buffer
+            switchedOutBuffer[tid] = NULL;
+            switchedOutValid[tid] = false;            
+
+            // Update any CPU stats based off context switches
+            cpu->updateContextSwitchStats();            
+        }        
+    }
+    
+}
  
  
  void
@@ -547,16 +592,16 @@ PipelineStage::sortInsts()
          for (int i = 0; i < insts_from_prev_stage; ++i) {
  
              if (prevStage->insts[i]->isSquashed()) {
-                DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], not inserting "
-                        "into stage buffer.\n",
+                DPRINTF(InOrderStage, "[tid:%i]: Ignoring squashed [sn:%i], "
+                        "not inserting into stage buffer.\n",
                      prevStage->insts[i]->readTid(),
                      prevStage->insts[i]->seqNum);
  
                  continue;
              }
  
-            DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage buffer.\n",
-                    prevStage->insts[i]->readTid(),
+            DPRINTF(InOrderStage, "[tid:%i]: Inserting [sn:%i] into stage "
+                    "buffer.\n", prevStage->insts[i]->readTid(),
                      prevStage->insts[i]->seqNum);
  
              ThreadID tid = prevStage->insts[i]->threadNumber;
@@ -565,7 +610,7 @@ PipelineStage::sortInsts()
  
              skidBuffer[tid].push(prevStage->insts[i]);
  
-            prevStage->insts[i] = dummyBufferInst;
+            prevStage->insts[i] = cpu->dummyBufferInst;
  
          }
      }
@@ -611,8 +656,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
      // Check for squash from later pipeline stages
      for (int stage_idx=stageNum; stage_idx < NumStages; stage_idx++) {
          if (fromNextStages->stageInfo[stage_idx][tid].squash) {
-            DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to squash "
-                "from stage %u.\n", tid, stage_idx);
+            DPRINTF(InOrderStage, "[tid:%u]: Squashing instructions due to "
+                    "squash from stage %u.\n", tid, stage_idx);
              InstSeqNum squash_seq_num = fromNextStages->
                  stageInfo[stage_idx][tid].bdelayDoneSeqNum;
              squash(squash_seq_num, tid);
@@ -625,8 +670,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
      }
  
      if (stageStatus[tid] == Blocked) {
-        DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to unblocking.\n",
-                tid);
+        DPRINTF(InOrderStage, "[tid:%u]: Done blocking, switching to "
+                "unblocking.\n", tid);
  
          stageStatus[tid] = Unblocking;
  
@@ -637,15 +682,15 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
  
      if (stageStatus[tid] == Squashing) {
          if (!skidBuffer[tid].empty()) {
-            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to unblocking.\n",
-                    tid);
+            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
+                    "unblocking.\n", tid);
  
              stageStatus[tid] = Unblocking;
          } else {
              // Switch status to running if stage isn't being told to block or
              // squash this cycle.
-            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to running.\n",
-                    tid);
+            DPRINTF(InOrderStage, "[tid:%u]: Done squashing, switching to "
+                    "running.\n", tid);
  
              stageStatus[tid] = Running;
          }
@@ -663,6 +708,8 @@ PipelineStage::checkSignalsAndUpdate(ThreadID tid)
  void
  PipelineStage::tick()
  {
+    idle = false;
+    
      wroteToTimeBuffer = false;
  
      bool status_change = false;
@@ -671,9 +718,11 @@ PipelineStage::tick()
          nextStage->size = 0;
  
      toNextStageIndex = 0;
-
+    
      sortInsts();
  
+    instsProcessed = 0;
+
      processStage(status_change);
  
      if (status_change) {
@@ -717,13 +766,13 @@ PipelineStage::unsetResStall(ResReqPtr res_req, ThreadID tid)
      }
  
      if (stalls[tid].resources.size() == 0) {
-        DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource stalls.\n",
-                tid);
+        DPRINTF(InOrderStage, "[tid:%u]: There are no remaining resource"
+                "stalls.\n", tid);
      }
  }
  
-// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be handled
-// one at a time, but instead first come first serve by instruction?
+// @TODO: Update How we handled threads in CPU. Maybe threads shouldnt be 
+// handled one at a time, but instead first come first serve by instruction?
  // Questions are how should a pipeline stage handle thread-specific stalls &
  // pipeline squashes
  void
@@ -746,30 +795,32 @@ PipelineStage::processStage(bool &status_change)
                  nextStage->size, stageNum + 1);
      }
  
+    if (instsProcessed > 0) {
+        ++runCycles;
+        idle = false;        
+    } else {
+        ++idleCycles;        
+        idle = true;        
+    }
+    
      DPRINTF(InOrderStage, "%i left in stage %i incoming buffer.\n", skidSize(),
              stageNum);
  
-    DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", stageBufferAvail(),
-            stageNum);
+    DPRINTF(InOrderStage, "%i available in stage %i incoming buffer.\n", 
+            stageBufferAvail(), stageNum);
  }
  
  void
  PipelineStage::processThread(bool &status_change, ThreadID tid)
  {
      // If status is Running or idle,
-    //     call stageInsts()
+    //     call processInsts()
      // If status is Unblocking,
      //     buffer any instructions coming from fetch
-    //     continue trying to empty skid buffer
+   //     continue trying to empty skid buffer
      //     check if stall conditions have passed
  
-    if (stageStatus[tid] == Blocked) {
-        ;//++stageBlockedCycles;
-    } else if (stageStatus[tid] == Squashing) {
-        ;//++stageSquashCycles;
-    }
-
-    // Stage should try to stage as many instructions as its bandwidth
+    // Stage should try to process as many instructions as its bandwidth
      // will allow, as long as it is not currently blocked.
      if (stageStatus[tid] == Running ||
          stageStatus[tid] == Idle) {
@@ -810,26 +861,22 @@ PipelineStage::processInsts(ThreadID tid)
      if (insts_available == 0) {
          DPRINTF(InOrderStage, "[tid:%u]: Nothing to do, breaking out"
                  " early.\n",tid);
-        // Should I change the status to idle?
-        //++stageIdleCycles;
          return;
      }
  
      DynInstPtr inst;
      bool last_req_completed = true;
  
-    int insts_processed = 0;
-
      while (insts_available > 0 &&
-           insts_processed < stageWidth &&
+           instsProcessed < stageWidth &&
             (!nextStageValid || canSendInstToStage(stageNum+1)) &&
             last_req_completed) {
          assert(!insts_to_stage.empty());
  
          inst = insts_to_stage.front();
  
-        DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] with "
-                "PC %#x\n",
+        DPRINTF(InOrderStage, "[tid:%u]: Processing instruction [sn:%lli] "
+                "with PC %#x\n",
                  tid, inst->seqNum, inst->readPC());
  
          if (inst->isSquashed()) {
@@ -837,8 +884,6 @@ PipelineStage::processInsts(ThreadID tid)
                      "squashed, skipping.\n",
                      tid, inst->seqNum, inst->readPC());
  
-            //++stageSquashedInsts;
-
              insts_to_stage.pop();
  
              --insts_available;
@@ -846,8 +891,14 @@ PipelineStage::processInsts(ThreadID tid)
              continue;
          }
  
+        int reqs_processed = 0;        
+        last_req_completed = processInstSchedule(inst, reqs_processed);
  
-        last_req_completed = processInstSchedule(inst);
+        // If the instruction isnt squashed & we've completed one request
+        // Then we can officially count this instruction toward the stage's 
+        // bandwidth count
+        if (reqs_processed > 0)
+            instsProcessed++;
  
          // Don't let instruction pass to next stage if it hasnt completed
          // all of it's requests for this stage.
@@ -856,16 +907,13 @@ PipelineStage::processInsts(ThreadID tid)
  
          // Send to Next Stage or Break Loop
          if (nextStageValid && !sendInstToNextStage(inst)) {
-            DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage %i.\n",
-                    tid, inst->seqNum,inst->nextStage);
+            DPRINTF(InOrderStage, "[tid:%i] [sn:%i] unable to proceed to stage"
+                    " %i.\n", tid, inst->seqNum,inst->nextStage);
              break;
          }
  
-        insts_processed++;
-
          insts_to_stage.pop();
  
-        //++stageProcessedInsts;
          --insts_available;
      }
  
@@ -883,12 +931,10 @@ PipelineStage::processInsts(ThreadID tid)
  }
  
  bool
-PipelineStage::processInstSchedule(DynInstPtr inst)
+PipelineStage::processInstSchedule(DynInstPtr inst,int &reqs_processed)
  {
      bool last_req_completed = true;
-#if TRACING_ON
      ThreadID tid = inst->readTid();
-#endif
  
      if (inst->nextResStage() == stageNum) {
          int res_stage_num = inst->nextResStage();
@@ -897,14 +943,15 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
              int res_num = inst->nextResource();
  
  
-            DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s.\n",
-                    tid, inst->seqNum, cpu->resPool->name(res_num));
+            DPRINTF(InOrderStage, "[tid:%i]: [sn:%i]: sending request to %s."
+                    "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
  
              ResReqPtr req = cpu->resPool->request(res_num, inst);
  
              if (req->isCompleted()) {
-                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s completed.\n",
-                        tid, inst->seqNum, cpu->resPool->name(res_num));
+                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s "
+                        "completed.\n", tid, inst->seqNum, 
+                        cpu->resPool->name(res_num));
  
                  if (req->fault == NoFault) {
                      inst->popSchedEntry();
@@ -912,12 +959,58 @@ PipelineStage::processInstSchedule(DynInstPtr inst)
                      panic("%i: encountered %s fault!\n",
                            curTick, req->fault->name());
                  }
+
+                reqs_processed++;                
+
+                req->stagePasses++;                
              } else {
-                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed.\n",
-                        tid, inst->seqNum, cpu->resPool->name(res_num));
+                DPRINTF(InOrderStage, "[tid:%i]: [sn:%i] request to %s failed."
+                        "\n", tid, inst->seqNum, cpu->resPool->name(res_num));
  
                  last_req_completed = false;
  
+                if (req->isMemStall() && 
+                    cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {
+                    // Save Stalling Instruction
+                    DPRINTF(ThreadModel, "[tid:%i] [sn:%i] Detected cache miss.\n", tid, inst->seqNum);
+
+                    DPRINTF(InOrderStage, "Inserting [tid:%i][sn:%i] into switch out buffer.\n",
+                             tid, inst->seqNum);                    
+
+                    switchedOutBuffer[tid] = inst;
+                    switchedOutValid[tid] = true;
+                    
+                    // Remove Thread From Pipeline & Resource Pool
+                    inst->squashingStage = stageNum;         
+                    inst->bdelaySeqNum = inst->seqNum;                               
+                    cpu->squashFromMemStall(inst, tid);  
+
+                    // Switch On Cache Miss
+                    //=====================
+                    // Suspend Thread at end of cycle
+                    DPRINTF(ThreadModel, "Suspending [tid:%i] due to cache miss.\n", tid);
+                    cpu->suspendContext(tid);                    
+
+                    // Activate Next Ready Thread at end of cycle
+                    DPRINTF(ThreadModel, "Attempting to activate next ready thread due to"
+                            " cache miss.\n");
+                    cpu->activateNextReadyContext();                                                                                               
+                }
+                
+                // Mark request for deletion
+                // if it isnt currently being used by a resource
+                if (!req->hasSlot()) {                   
+                    DPRINTF(InOrderStage, "[sn:%i] Deleting Request, has no slot in resource.\n",
+                            inst->seqNum);
+                    
+                    cpu->reqRemoveList.push(req);
+                } else {
+                    DPRINTF(InOrderStage, "[sn:%i] Ignoring Request Deletion, in resource [slot:%i].\n",
+                            inst->seqNum, req->getSlot());
+                    //req = cpu->dummyReq[tid];                    
+                }
+                
+                
                  break;
              }
  
@@ -956,12 +1049,12 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
      assert(next_stage >= 1);
      assert(prev_stage >= 0);
  
-    DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to stage %u.\n", tid,
-            stageNum+1);
+    DPRINTF(InOrderStage, "[tid:%u]: Attempting to send instructions to "
+            "stage %u.\n", tid, stageNum+1);
  
      if (!canSendInstToStage(inst->nextStage)) {
-        DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to stage %u.\n", tid,
-            stageNum+1);
+        DPRINTF(InOrderStage, "[tid:%u]: Could not send instruction to "
+                "stage %u.\n", tid, stageNum+1);
          return false;
      }
  
@@ -969,12 +1062,14 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
      if (nextStageQueueValid(inst->nextStage - 1)) {
          if (inst->seqNum > cpu->squashSeqNum[tid] &&
              curTick == cpu->lastSquashCycle[tid]) {
-            DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping insertion "
-                    "into stage %i queue.\n", tid, inst->seqNum, inst->nextStage);
+            DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: squashed, skipping "
+                    "insertion into stage %i queue.\n", tid, inst->seqNum, 
+                    inst->nextStage);
          } else {
              if (nextStageValid) {
-                DPRINTF(InOrderStage, "[tid:%u] %i slots available in next stage buffer.\n",
-                    tid, cpu->pipelineStage[next_stage]->stageBufferAvail());
+                DPRINTF(InOrderStage, "[tid:%u] %i slots available in next "
+                        "stage buffer.\n", tid, 
+                        cpu->pipelineStage[next_stage]->stageBufferAvail());
              }
  
              DPRINTF(InOrderStage, "[tid:%u]: [sn:%i]: being placed into  "
@@ -982,11 +1077,13 @@ PipelineStage::sendInstToNextStage(DynInstPtr inst)
                      tid, inst->seqNum, toNextStageIndex,
                      cpu->pipelineStage[prev_stage]->nextStageQueue->id());
  
-            int next_stage_idx = cpu->pipelineStage[prev_stage]->nextStage->size;
+            int next_stage_idx = 
+                cpu->pipelineStage[prev_stage]->nextStage->size;
  
-            // Place instructions in inter-stage communication struct for the next
+            // Place instructions in inter-stage communication struct for next
              // pipeline stage to read next cycle
-            cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] = inst;
+            cpu->pipelineStage[prev_stage]->nextStage->insts[next_stage_idx] 
+                = inst;
  
              ++(cpu->pipelineStage[prev_stage]->nextStage->size);
  
diff --git a/src/cpu/inorder/pipeline_stage.hh b/src/cpu/inorder/pipeline_stage.hh

index 86ee981327925b26ca814e88a843093ebb07d2c9..6c9cf0d9988ff203855e8459263203a94b19d8dc 100644 (file)
--- a/src/cpu/inorder/pipeline_stage.hh
+++ b/src/cpu/inorder/pipeline_stage.hh
@@ -178,7 +178,7 @@ class PipelineStage
      virtual void processInsts(ThreadID tid);
  
      /** Process all resources on an instruction's resource schedule */
-    virtual bool processInstSchedule(DynInstPtr inst);
+    virtual bool processInstSchedule(DynInstPtr inst, int &reqs_processed);
  
      /** Is there room in the next stage buffer for this instruction? */
      virtual bool canSendInstToStage(unsigned stage_num);
@@ -235,11 +235,15 @@ class PipelineStage
  
  
    public:
+    virtual void activateThread(ThreadID tid);
+    
      /** Squashes if there is a PC-relative branch that was predicted
       * incorrectly. Sends squash information back to fetch.
       */
      virtual void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
  
+    virtual void squashDueToMemStall(InstSeqNum seq_num, ThreadID tid);
+
      /** Squash instructions from stage buffer  */
      virtual void squashPrevStageInsts(InstSeqNum squash_seq_num, ThreadID tid);
  
@@ -259,19 +263,33 @@ class PipelineStage
      /** List of active thread ids */
      std::list<ThreadID> *activeThreads;
  
+    /** Buffer of instructions switched out to mem-stall. 
+     *  Only used when using SwitchOnCacheMiss threading model
+     *  Used as 1-to-1 mapping between ThreadID and Entry. 
+     */
+    std::vector<DynInstPtr> switchedOutBuffer;
+    std::vector<bool> switchedOutValid;
+
+    /** Instructions that we've processed this tick
+     *  NOTE: "Processed" means completed at least 1 instruction request 
+     */
+    unsigned instsProcessed;    
+
      /** Queue of all instructions coming from previous stage on this cycle. */
      std::queue<DynInstPtr> insts[ThePipeline::MaxThreads];
  
-    /** Queue of instructions that are finished processing and ready to go next stage.
-     *  This is used to prevent from processing an instrution more than once on any
-     *  stage. NOTE: It is up to the PROGRAMMER must manage this as a queue
+    /** Queue of instructions that are finished processing and ready to go 
+     *  next stage. This is used to prevent from processing an instrution more 
+     *  than once on any stage. NOTE: It is up to the PROGRAMMER must manage 
+     *  this as a queue
       */
      std::list<DynInstPtr> instsToNextStage;
  
      /** Skid buffer between previous stage and this one. */
      std::queue<DynInstPtr> skidBuffer[ThePipeline::MaxThreads];
  
-    /** Instruction used to signify that there is no *real* instruction in buffer slot */
+    /** Instruction used to signify that there is no *real* instruction in
+     *  buffer slot */
      DynInstPtr dummyBufferInst;
  
      /** SeqNum of Squashing Branch Delay Instruction (used for MIPS) */
@@ -329,30 +347,27 @@ class PipelineStage
      /** Is Next Stage Valid? */
      bool nextStageValid;
  
+    bool idle;
+    
      /** Source of possible stalls. */
      struct Stalls {
          bool stage[ThePipeline::NumStages];
          std::vector<ResReqPtr> resources;
      };
  
-    /** Tracks which stages are telling decode to stall. */
+    /** Tracks stage/resource stalls */
      Stalls stalls[ThePipeline::MaxThreads];
  
-    //@TODO: Use Stats for the pipeline stages
-    /** Stat for total number of idle cycles. */
-    //Stats::Scalar stageIdleCycles;
-    /** Stat for total number of blocked cycles. */
-    //Stats::Scalar stageBlockedCycles;
-    /** Stat for total number of normal running cycles. */
-    //Stats::Scalar stageRunCycles;
-    /** Stat for total number of unblocking cycles. */
-    //Stats::Scalar stageUnblockCycles;
-    /** Stat for total number of squashing cycles. */
-    //Stats::Scalar stageSquashCycles;
-    /** Stat for total number of staged instructions. */
-    //Stats::Scalar stageProcessedInsts;
-    /** Stat for total number of squashed instructions. */
-    //Stats::Scalar stageSquashedInsts;
+    /** Number of cycles 0 instruction(s) are processed. */
+    Stats::Scalar idleCycles;
+
+    /** Number of cycles 1+ instructions are processed. */
+    Stats::Scalar runCycles;
+
+    /** Percentage of cycles 1+ instructions are processed. */
+    Stats::Formula utilization;
+
+
  };
  
  #endif
diff --git a/src/cpu/inorder/pipeline_traits.cc b/src/cpu/inorder/pipeline_traits.cc

index ed72ab1d076f8381f55cb9c3550823e1f12fe436..8ff26dce2ee0454a52ff668a195fd6a8b044cd43 100644 (file)
--- a/src/cpu/inorder/pipeline_traits.cc
+++ b/src/cpu/inorder/pipeline_traits.cc
@@ -65,16 +65,18 @@ int getNextPriority(DynInstPtr &inst, int stage_num)
  
  void createFrontEndSchedule(DynInstPtr &inst)
  {
-    InstStage *I = inst->addStage();
-    InstStage *E = inst->addStage();
-
-    I->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
-    I->needs(ICache, CacheUnit::InitiateFetch);
-
-    E->needs(ICache, CacheUnit::CompleteFetch);
-    E->needs(Decode, DecodeUnit::DecodeInst);
-    E->needs(BPred, BranchPredictor::PredictBranch);
-    E->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
+    InstStage *F = inst->addStage();
+    InstStage *D = inst->addStage();
+
+    // FETCH
+    F->needs(FetchSeq, FetchSeqUnit::AssignNextPC);
+    F->needs(ICache, CacheUnit::InitiateFetch);
+
+    // DECODE
+    D->needs(ICache, CacheUnit::CompleteFetch);
+    D->needs(Decode, DecodeUnit::DecodeInst);
+    D->needs(BPred, BranchPredictor::PredictBranch);
+    D->needs(FetchSeq, FetchSeqUnit::UpdateTargetPC);
  }
  
  bool createBackEndSchedule(DynInstPtr &inst)
@@ -83,45 +85,48 @@ bool createBackEndSchedule(DynInstPtr &inst)
          return false;
      }
  
-    InstStage *E = inst->currentStage();
+    InstStage *X = inst->addStage();
      InstStage *M = inst->addStage();
-    InstStage *A = inst->addStage();
      InstStage *W = inst->addStage();
  
+    // EXECUTE
      for (int idx=0; idx < inst->numSrcRegs(); idx++) {
          if (!idx || !inst->isStore()) {
-            E->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
+            X->needs(RegManager, UseDefUnit::ReadSrcReg, idx);
          }
      }
  
-
      if ( inst->isNonSpeculative() ) {
          // skip execution of non speculative insts until later
      } else if ( inst->isMemRef() ) {
          if ( inst->isLoad() ) {
-            E->needs(AGEN, AGENUnit::GenerateAddr);
-            E->needs(DCache, CacheUnit::InitiateReadData);
+            X->needs(AGEN, AGENUnit::GenerateAddr);
          }
      } else if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        E->needs(MDU, MultDivUnit::StartMultDiv);
+        X->needs(MDU, MultDivUnit::StartMultDiv);
      } else {
-        E->needs(ExecUnit, ExecutionUnit::ExecuteInst);
+        X->needs(ExecUnit, ExecutionUnit::ExecuteInst);
      }
  
      if (inst->opClass() == IntMultOp || inst->opClass() == IntDivOp) {
-        M->needs(MDU, MultDivUnit::EndMultDiv);
+        X->needs(MDU, MultDivUnit::EndMultDiv);
      }
  
+    // MEMORY
      if ( inst->isLoad() ) {
-        M->needs(DCache, CacheUnit::CompleteReadData);
+        M->needs(DCache, CacheUnit::InitiateReadData);
      } else if ( inst->isStore() ) {
          M->needs(RegManager, UseDefUnit::ReadSrcReg, 1);
          M->needs(AGEN, AGENUnit::GenerateAddr);
          M->needs(DCache, CacheUnit::InitiateWriteData);
      }
  
-    if ( inst->isStore() ) {
-        A->needs(DCache, CacheUnit::CompleteWriteData);
+
+    // WRITEBACK
+    if ( inst->isLoad() ) {
+        W->needs(DCache, CacheUnit::CompleteReadData);
+    } else if ( inst->isStore() ) {
+        W->needs(DCache, CacheUnit::CompleteWriteData);
      }
  
      if ( inst->isNonSpeculative() ) {
diff --git a/src/cpu/inorder/pipeline_traits.hh b/src/cpu/inorder/pipeline_traits.hh

index 3c28894e76ebbb9798e977857e79c95d7a63e21f..f039b9e5debaa64242802d8793446f6aee403ceb 100644 (file)
--- a/src/cpu/inorder/pipeline_traits.hh
+++ b/src/cpu/inorder/pipeline_traits.hh
@@ -53,8 +53,8 @@ namespace ThePipeline {
      const unsigned StageWidth = 1;
      const unsigned BackEndStartStage = 2;
  
-    // Enumerated List of Resources The Pipeline Uses
-    enum ResourceList {
+    // List of Resources The Pipeline Uses
+    enum ResourceId {
         FetchSeq = 0,
         ICache,
         Decode,
@@ -94,6 +94,7 @@ namespace ThePipeline {
              stageNum(stage_num), resNum(res_num), cmd(_cmd),
              idx(_idx), priority(_priority)
          { }
+
          virtual ~ScheduleEntry(){}
  
          // Stage number to perform this service.
@@ -113,7 +114,8 @@ namespace ThePipeline {
      };
  
      struct entryCompare {
-        bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) const
+        bool operator()(const ScheduleEntry* lhs, const ScheduleEntry* rhs) 
+            const
          {
              // Prioritize first by stage number that the resource is needed
              if (lhs->stageNum > rhs->stageNum) {
@@ -158,7 +160,6 @@ namespace ThePipeline {
                  stageNum, nextTaskPriority++, unit, request, param
              ));
          }
-
      };
  };
  
diff --git a/src/cpu/inorder/reg_dep_map.cc b/src/cpu/inorder/reg_dep_map.cc

index 51782a588a19b94c953b2c7e0131455f0f86bcae..7fac0a90568fa6a4a85787775e845118658a8ad3 100644 (file)
--- a/src/cpu/inorder/reg_dep_map.cc
+++ b/src/cpu/inorder/reg_dep_map.cc
@@ -235,3 +235,27 @@ RegDepMap::findBypassInst(unsigned idx)
  
      return NULL;
  }
+
+void
+RegDepMap::dump()
+{
+    
+    for (int idx=0; idx < regMap.size(); idx++) {
+        
+        if (regMap[idx].size() > 0) {
+            cprintf("Reg #%i (size:%i): ", idx, regMap[idx].size());
+
+            std::list<DynInstPtr>::iterator list_it = regMap[idx].begin();
+            std::list<DynInstPtr>::iterator list_end = regMap[idx].end();
+        
+            while (list_it != list_end) {
+                cprintf("[sn:%i] ", (*list_it)->seqNum);
+
+                list_it++;            
+            }        
+
+            cprintf("\n");
+        }
+        
+    }    
+}
diff --git a/src/cpu/inorder/reg_dep_map.hh b/src/cpu/inorder/reg_dep_map.hh

index b78e211bbf99eefdb2ce7f68f453816c98f2e938..cb9d35bf4f014ba62e3b3067b0398ea3dbcb7cc8 100644 (file)
--- a/src/cpu/inorder/reg_dep_map.hh
+++ b/src/cpu/inorder/reg_dep_map.hh
@@ -88,6 +88,8 @@ class RegDepMap
      /** Size of Dependency of Map */
      int depSize(unsigned idx);
  
+    void dump();
+    
    protected:
      // Eventually make this a map of lists for
      // efficiency sake!
diff --git a/src/cpu/inorder/resource.cc b/src/cpu/inorder/resource.cc

index cb5681bc1e73ad05d52a3b20fb567baa2716157c..e63925fe89a328a116207e5c885c0132df66d7f2 100644 (file)
--- a/src/cpu/inorder/resource.cc
+++ b/src/cpu/inorder/resource.cc
@@ -47,6 +47,7 @@ Resource::Resource(string res_name, int res_id, int res_width,
  Resource::~Resource()
  {
      delete [] resourceEvent;
+    delete deniedReq;    
  }
  
  
@@ -80,7 +81,9 @@ Resource::regStats()
  {
      instReqsProcessed
          .name(name() + ".instReqsProcessed")
-        .desc("Number of Instructions Requests that completed in this resource.");
+        .desc("Number of Instructions Requests that completed in "
+              "this resource.")
+        .prereq(instReqsProcessed);
  }
  
  int
@@ -98,11 +101,6 @@ Resource::slotsInUse()
  void
  Resource::freeSlot(int slot_idx)
  {
-    DPRINTF(RefCount, "Removing [tid:%i] [sn:%i]'s request from resource [slot:%i].\n",
-            reqMap[slot_idx]->inst->readTid(),
-            reqMap[slot_idx]->inst->seqNum,
-            slot_idx);
-
      // Put slot number on this resource's free list
      availSlots.push_back(slot_idx);
  
@@ -159,7 +157,8 @@ Resource::getSlot(DynInstPtr inst)
  
          while (map_it != map_end) {
              if ((*map_it).second) {
-                DPRINTF(Resource, "Currently Serving request from: [tid:%i] [sn:%i].\n",
+                DPRINTF(Resource, "Currently Serving request from: "
+                        "[tid:%i] [sn:%i].\n",
                          (*map_it).second->getInst()->readTid(),
                          (*map_it).second->getInst()->seqNum);
              }
@@ -176,7 +175,7 @@ Resource::request(DynInstPtr inst)
      // See if the resource is already serving this instruction.
      // If so, use that request;
      bool try_request = false;
-    int slot_num;
+    int slot_num = -1;
      int stage_num;
      ResReqPtr inst_req = findRequest(inst);
  
@@ -202,10 +201,12 @@ Resource::request(DynInstPtr inst)
              inst_req = getRequest(inst, stage_num, id, slot_num, cmd);
  
              if (inst->staticInst) {
-                DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource.\n",
+                DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this "
+                        "resource.\n",
                          inst->readTid(), inst->seqNum);
              } else {
-                DPRINTF(Resource, "[tid:%i]: instruction requesting this resource.\n",
+                DPRINTF(Resource, "[tid:%i]: instruction requesting this "
+                        "resource.\n",
                          inst->readTid());
              }
  
@@ -232,7 +233,8 @@ Resource::requestAgain(DynInstPtr inst, bool &do_request)
      do_request = true;
  
      if (inst->staticInst) {
-        DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource again.\n",
+        DPRINTF(Resource, "[tid:%i]: [sn:%i] requesting this resource "
+                "again.\n",
                  inst->readTid(), inst->seqNum);
      } else {
          DPRINTF(Resource, "[tid:%i]: requesting this resource again.\n",
@@ -254,15 +256,22 @@ Resource::findRequest(DynInstPtr inst)
      map<int, ResReqPtr>::iterator map_it = reqMap.begin();
      map<int, ResReqPtr>::iterator map_end = reqMap.end();
  
+    bool found = false;
+    ResReqPtr req = NULL;
+    
      while (map_it != map_end) {
          if ((*map_it).second &&
-            (*map_it).second->getInst() == inst) {
-            return (*map_it).second;
+            (*map_it).second->getInst() == inst) {            
+            req = (*map_it).second;
+            //return (*map_it).second;
+            assert(found == false);
+            found = true;            
          }
          map_it++;
      }
  
-    return NULL;
+    return req;    
+    //return NULL;
  }
  
  void
@@ -334,6 +343,12 @@ Resource::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
      }
  }
  
+void
+Resource::squashDueToMemStall(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
+                              ThreadID tid)
+{
+    squash(inst, stage_num, squash_seq_num, tid);    
+}
  
  Tick
  Resource::ticks(int num_cycles)
@@ -394,22 +409,72 @@ Resource::unscheduleEvent(DynInstPtr inst)
  
  int ResourceRequest::resReqID = 0;
  
-int ResourceRequest::resReqCount = 0;
+int ResourceRequest::maxReqCount = 0;
+
+ResourceRequest::ResourceRequest(Resource *_res, DynInstPtr _inst, 
+                                 int stage_num, int res_idx, int slot_num, 
+                                 unsigned _cmd)
+    : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
+      resIdx(res_idx), slotNum(slot_num), completed(false),
+      squashed(false), processing(false), memStall(false)
+{
+#ifdef DEBUG
+        reqID = resReqID++;
+        res->cpu->resReqCount++;
+        DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, 
+                res->cpu->resReqCount);
+
+        if (res->cpu->resReqCount > 100) {
+            fatal("Too many undeleted resource requests. Memory leak?\n");
+        }
+
+        if (res->cpu->resReqCount > maxReqCount) {            
+            maxReqCount = res->cpu->resReqCount;
+            res->cpu->maxResReqCount = maxReqCount;            
+        }
+        
+#endif
+
+        stagePasses = 0;
+        complSlotNum = -1;
+        
+}
+
+ResourceRequest::~ResourceRequest()
+{
+#ifdef DEBUG
+        res->cpu->resReqCount--;
+        DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, 
+                res->cpu->resReqCount);
+#endif
+}
  
  void
  ResourceRequest::done(bool completed)
  {
-    DPRINTF(Resource, "%s done with request from [sn:%i] [tid:%i].\n",
-            res->name(), inst->seqNum, inst->readTid());
+    DPRINTF(Resource, "%s [slot:%i] done with request from [sn:%i] [tid:%i].\n",
+            res->name(), slotNum, inst->seqNum, inst->readTid());
  
      setCompleted(completed);
  
-    // Add to remove list
-    res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
-
+    // Used for debugging purposes
+    if (completed) {
+        complSlotNum = slotNum;
+    
+        // Would like to start a convention such as all requests deleted in resources/pipeline
+        // but a little more complex then it seems...
+        // For now, all COMPLETED requests deleted in resource..
+        //          all FAILED requests deleted in pipeline stage
+        //          *all SQUASHED requests deleted in resource
+        res->cpu->reqRemoveList.push(res->reqMap[slotNum]);
+    }
+    
      // Free Slot So Another Instruction Can Use This Resource
      res->freeSlot(slotNum);
  
+    // change slot # to -1, since we check slotNum to see if request is still valid
+    slotNum = -1;
+        
      res->instReqsProcessed++;
  }
  
diff --git a/src/cpu/inorder/resource.hh b/src/cpu/inorder/resource.hh

index 605b7f690e994b3234f256066c8ff0484b3687fe..b9650df18622ab0af96074317baccdc1779fda43 100644 (file)
--- a/src/cpu/inorder/resource.hh
+++ b/src/cpu/inorder/resource.hh
@@ -70,7 +70,8 @@ class Resource {
      /** Define this function if resource, has a port to connect to an outside
       *  simulation object.
       */
-    virtual Port* getPort(const std::string &if_name, int idx) { return NULL; }
+    virtual Port* getPort(const std::string &if_name, int idx) 
+    { return NULL; }
  
      /** Return ID for this resource */
      int getId() { return id; }
@@ -92,6 +93,14 @@ class Resource {
       */
      virtual void deactivateThread(ThreadID tid);
  
+    /** Resources that care about thread activation override this. */
+    virtual void suspendThread(ThreadID tid) { }
+    
+    /** Will be called the cycle before a context switch. Any bookkeeping
+     *  that needs to be kept for that, can be done here
+     */
+    virtual void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid) { }    
+
      /** Resources that care when an instruction has been graduated
       *  can override this
       */
@@ -114,9 +123,9 @@ class Resource {
      /** Free a resource slot */
      virtual void freeSlot(int slot_idx);
  
-    /** Request usage of a resource for this instruction. If this instruction already
-     *  has made this request to this resource, and that request is uncompleted
-     *  this function will just return that request
+    /** Request usage of a resource for this instruction. If this instruction 
+     *  already has made this request to this resource, and that request is 
+     *  uncompleted this function will just return that request
       */
      virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                          int res_idx, int slot_num,
@@ -155,6 +164,9 @@ class Resource {
      virtual void squash(DynInstPtr inst, int stage_num,
                          InstSeqNum squash_seq_num, ThreadID tid);
  
+    virtual void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                                     InstSeqNum squash_seq_num, ThreadID tid);
+
      /** The number of instructions available that this resource can
       *  can still process
       */
@@ -166,7 +178,8 @@ class Resource {
      /** Schedule resource event, regardless of its current state. */
      void scheduleEvent(int slot_idx, int delay);
  
-    /** Find instruction in list, Schedule resource event, regardless of its current state. */
+    /** Find instruction in list, Schedule resource event, regardless of its 
+     *  current state. */
      bool scheduleEvent(DynInstPtr inst, int delay);
  
      /** Unschedule resource event, regardless of its current state. */
@@ -303,30 +316,14 @@ class ResourceRequest
  
      static int resReqID;
  
-    static int resReqCount;
-
+    static int maxReqCount;
+    
    public:
      ResourceRequest(Resource *_res, DynInstPtr _inst, int stage_num,
-                    int res_idx, int slot_num, unsigned _cmd)
-        : res(_res), inst(_inst), cmd(_cmd),  stageNum(stage_num),
-          resIdx(res_idx), slotNum(slot_num), completed(false),
-          squashed(false), processing(false), waiting(false)
-    {
-        reqID = resReqID++;
-        resReqCount++;
-        DPRINTF(ResReqCount, "Res. Req %i created. resReqCount=%i.\n", reqID, resReqCount);
-
-        if (resReqCount > 100) {
-            fatal("Too many undeleted resource requests. Memory leak?\n");
-        }
-    }
-
-    virtual ~ResourceRequest()
-    {
-        resReqCount--;
-        DPRINTF(ResReqCount, "Res. Req %i deleted. resReqCount=%i.\n", reqID, resReqCount);
-    }
-
+                    int res_idx, int slot_num, unsigned _cmd);
+    
+    virtual ~ResourceRequest();
+    
      int reqID;
  
      /** Acknowledge that this is a request is done and remove
@@ -334,6 +331,8 @@ class ResourceRequest
       */
      void done(bool completed = true);
  
+    short stagePasses;
+    
      /////////////////////////////////////////////
      //
      // GET RESOURCE REQUEST IDENTIFICATION / INFO
@@ -342,8 +341,11 @@ class ResourceRequest
      /** Get Resource Index */
      int getResIdx() { return resIdx; }
  
+       
      /** Get Slot Number */
      int getSlot() { return slotNum; }
+    int getComplSlot() { return complSlotNum; }
+    bool hasSlot()  { return slotNum >= 0; }     
  
      /** Get Stage Number */
      int getStageNum() { return stageNum; }
@@ -366,6 +368,9 @@ class ResourceRequest
      /** Instruction being used */
      DynInstPtr inst;
  
+    /** Not guaranteed to be set, used for debugging */
+    InstSeqNum seqNum;
+    
      /** Fault Associated With This Resource Request */
      Fault fault;
  
@@ -390,8 +395,8 @@ class ResourceRequest
      void setProcessing() { processing = true; }
  
      /** Get/Set IsWaiting variables */
-    bool isWaiting() { return waiting; }
-    void setWaiting() { waiting = true; }
+    bool isMemStall() { return memStall; }
+    void setMemStall(bool stall = true) { memStall = stall; }
  
    protected:
      /** Resource Identification */
@@ -399,12 +404,14 @@ class ResourceRequest
      int stageNum;
      int resIdx;
      int slotNum;
-
-    /** Resource Status */
+    int complSlotNum;
+    
+    /** Resource Request Status */
      bool completed;
      bool squashed;
      bool processing;
-    bool waiting;
+
+    bool memStall;
  };
  
  #endif //__CPU_INORDER_RESOURCE_HH__
diff --git a/src/cpu/inorder/resource_pool.cc b/src/cpu/inorder/resource_pool.cc

index 0d78c232bbd2f26cd7599baf7c9f6e46d32ac563..74bf4f03b78c87e18174fccaf8831ab1a18bdc33 100644 (file)
--- a/src/cpu/inorder/resource_pool.cc
+++ b/src/cpu/inorder/resource_pool.cc
@@ -41,45 +41,74 @@ using namespace ThePipeline;
  ResourcePool::ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params)
      : cpu(_cpu)
  {
-    //@todo: use this function to instantiate the resources in resource pool. This will help in the
-    //auto-generation of this pipeline model.
+    //@todo: use this function to instantiate the resources in resource pool. 
+    //This will help in the auto-generation of this pipeline model.
      //ThePipeline::addResources(resources, memObjects);
  
      // Declare Resource Objects
      // name - id - bandwidth - latency - CPU - Parameters
      // --------------------------------------------------
-    resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, StageWidth * 2, 0, _cpu, params));
+    resources.push_back(new FetchSeqUnit("Fetch-Seq-Unit", FetchSeq, 
+                                         StageWidth * 2, 0, _cpu, params));
  
      memObjects.push_back(ICache);
-    resources.push_back(new CacheUnit("icache_port", ICache, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new CacheUnit("icache_port", ICache, 
+                                      StageWidth * MaxThreads, 0, _cpu, 
+                                      params));
  
-    resources.push_back(new DecodeUnit("Decode-Unit", Decode, StageWidth, 0, _cpu, params));
+    resources.push_back(new DecodeUnit("Decode-Unit", Decode, 
+                                       StageWidth, 0, _cpu, params));
  
-    resources.push_back(new BranchPredictor("Branch-Predictor", BPred, StageWidth, 0, _cpu, params));
+    resources.push_back(new BranchPredictor("Branch-Predictor", BPred, 
+                                            StageWidth, 0, _cpu, params));
  
-    resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 0, _cpu, params));
+    resources.push_back(new InstBuffer("Fetch-Buffer-T0", FetchBuff, 4, 
+                                       0, _cpu, params));
  
-    resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new UseDefUnit("RegFile-Manager", RegManager, 
+                                       StageWidth * MaxThreads, 0, _cpu, 
+                                       params));
  
-    resources.push_back(new AGENUnit("AGEN-Unit", AGEN, StageWidth, 0, _cpu, params));
+    resources.push_back(new AGENUnit("AGEN-Unit", AGEN, 
+                                     StageWidth, 0, _cpu, params));
  
-    resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, StageWidth, 0, _cpu, params));
+    resources.push_back(new ExecutionUnit("Execution-Unit", ExecUnit, 
+                                          StageWidth, 0, _cpu, params));
  
-    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, params));
+    resources.push_back(new MultDivUnit("Mult-Div-Unit", MDU, 5, 0, _cpu, 
+                                        params));
  
      memObjects.push_back(DCache);
-    resources.push_back(new CacheUnit("dcache_port", DCache, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new CacheUnit("dcache_port", DCache, 
+                                      StageWidth * MaxThreads, 0, _cpu, 
+                                      params));
  
-    resources.push_back(new GraduationUnit("Graduation-Unit", Grad, StageWidth * MaxThreads, 0, _cpu, params));
+    resources.push_back(new GraduationUnit("Graduation-Unit", Grad, 
+                                           StageWidth * MaxThreads, 0, _cpu, 
+                                           params));
  
-    resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 0, _cpu, params));
+    resources.push_back(new InstBuffer("Fetch-Buffer-T1", FetchBuff2, 4, 
+                                       0, _cpu, params));
  }
  
+ResourcePool::~ResourcePool()
+{
+    cout << "Deleting resources ..." << endl;
+    
+    for (int i=0; i < resources.size(); i++) {
+        DPRINTF(Resource, "Deleting resource: %s.\n", resources[i]->name());
+        
+        delete resources[i];
+    }    
+}
+
+
  void
  ResourcePool::init()
  {
      for (int i=0; i < resources.size(); i++) {
-        DPRINTF(Resource, "Initializing resource: %s.\n", resources[i]->name());
+        DPRINTF(Resource, "Initializing resource: %s.\n", 
+                resources[i]->name());
          
          resources[i]->init();
      }
@@ -113,8 +142,8 @@ ResourcePool::getPort(const std::string &if_name, int idx)
          int obj_idx = memObjects[i];
          Port *port = resources[obj_idx]->getPort(if_name, idx);
          if (port != NULL) {
-            DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", if_name,
-                    resources[obj_idx]->name(), obj_idx);
+            DPRINTF(Resource, "%s set to resource %s(#%i) in Resource Pool.\n", 
+                    if_name, resources[obj_idx]->name(), obj_idx);
              return port;
          }
      }
@@ -131,7 +160,8 @@ ResourcePool::getPortIdx(const std::string &port_name)
          unsigned obj_idx = memObjects[i];
          Port *port = resources[obj_idx]->getPort(port_name, obj_idx);
          if (port != NULL) {
-            DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, port_name);
+            DPRINTF(Resource, "Returning Port Idx %i for %s.\n", obj_idx, 
+                    port_name);
              return obj_idx;
          }
      }
@@ -151,6 +181,25 @@ ResourcePool::getResIdx(const std::string &res_name)
              return idx;
      }
  
+    panic("Can't find resource idx for: %s\n", res_name);
+    return 0;
+}
+
+unsigned
+ResourcePool::getResIdx(const ThePipeline::ResourceId &res_id)
+{
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        if (resources[idx]->getId() == res_id)
+            return idx;
+    }
+
+    // todo: change return value to int and return a -1 here
+    //       maybe even have enumerated type
+    //       panic for now...
+    panic("Can't find resource idx for: %i\n", res_id);
+
      return 0;
  }
  
@@ -167,7 +216,8 @@ void
  ResourcePool::squash(DynInstPtr inst, int res_idx, InstSeqNum done_seq_num,
                       ThreadID tid)
  {
-    resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, tid);
+    resources[res_idx]->squash(inst, ThePipeline::NumStages-1, done_seq_num, 
+                               tid);
  }
  
  int
@@ -182,6 +232,12 @@ ResourcePool::slotsInUse(int res_idx)
      return resources[res_idx]->slotsInUse();
  }
  
+//@todo: split this function and call this version schedulePoolEvent
+//       and use this scheduleEvent for scheduling a specific event on 
+//       a resource
+//@todo: For arguments that arent being used in a ResPoolEvent, a dummyParam
+//       or some typedef can be used to signify what's important info
+//       to the event construction
  void
  ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
                              int delay,  int res_idx, ThreadID tid)
@@ -192,23 +248,45 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
      {
        case InOrderCPU::ActivateThread:
          {
-            DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event for tick %i.\n",
-                    curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,
-                                                            e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->bdelaySeqNum,
-                                                            inst->readTid());
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            DPRINTF(Resource, "Scheduling Activate Thread Resource Pool Event "
+                    "for tick %i, [tid:%i].\n", curTick + delay, 
+                    inst->readTid());
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,
+                                 e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
+        }
+        break;
+
+      case InOrderCPU::HaltThread:
+      case InOrderCPU::DeactivateThread:
+        {
+
+            DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool "
+                    "Event for tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,
+                                 e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 tid);
+
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
+
          }
          break;
  
        case InOrderCPU::SuspendThread:
-      case InOrderCPU::DeallocateThread:
          {
  
-            DPRINTF(Resource, "Scheduling Deactivate Thread Resource Pool Event for tick %i.\n",
+            DPRINTF(Resource, "Scheduling Suspend Thread Resource Pool Event for tick %i.\n",
                      curTick + delay);
              ResPoolEvent *res_pool_event = new ResPoolEvent(this,
                                                              e_type,
@@ -224,34 +302,68 @@ ResourcePool::scheduleEvent(InOrderCPU::CPUEventType e_type, DynInstPtr inst,
  
        case ResourcePool::InstGraduated:
          {
-            DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool Event for tick %i.\n",
-                    curTick + delay);
-            ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
-                                                            inst,
-                                                            inst->squashingStage,
-                                                            inst->seqNum,
-                                                            inst->readTid());
-            mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+            DPRINTF(Resource, "Scheduling Inst-Graduated Resource Pool "
+                    "Event for tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->seqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
  
          }
          break;
  
        case ResourcePool::SquashAll:
          {
-            DPRINTF(Resource, "Scheduling Squash Resource Pool Event for tick %i.\n",
+            DPRINTF(Resource, "Scheduling Squash Resource Pool Event for "
+                    "tick %i.\n", curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->bdelaySeqNum,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
+        }
+        break;
+
+      case InOrderCPU::SquashFromMemStall:
+        {
+            DPRINTF(Resource, "Scheduling Squash Due to Memory Stall Resource "
+                    "Pool Event for tick %i.\n",
+                    curTick + delay);
+            ResPoolEvent *res_pool_event = 
+                new ResPoolEvent(this,e_type,
+                                 inst,
+                                 inst->squashingStage,
+                                 inst->seqNum - 1,
+                                 inst->readTid());
+            mainEventQueue.schedule(res_pool_event, 
+                                    curTick + cpu->ticks(delay));
+        }
+        break;
+
+      case ResourcePool::UpdateAfterContextSwitch:
+        {
+            DPRINTF(Resource, "Scheduling UpdatePC Resource Pool Event for tick %i.\n",
                      curTick + delay);
              ResPoolEvent *res_pool_event = new ResPoolEvent(this,e_type,
                                                              inst,
                                                              inst->squashingStage,
-                                                            inst->bdelaySeqNum,
+                                                            inst->seqNum,
                                                              inst->readTid());
              mainEventQueue.schedule(res_pool_event, curTick + cpu->ticks(delay));
+
          }
          break;
  
        default:
-        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", InOrderCPU::eventNames[e_type]);
-        ; // If Resource Pool doesnt recognize event, we ignore it.
+        DPRINTF(Resource, "Ignoring Unrecognized CPU Event (%s).\n", 
+                InOrderCPU::eventNames[e_type]);
      }
  }
  
@@ -265,8 +377,9 @@ void
  ResourcePool::squashAll(DynInstPtr inst, int stage_num,
                          InstSeqNum done_seq_num, ThreadID tid)
  {
-    DPRINTF(Resource, "[tid:%i] Stage %i squashing all instructions above [sn:%i].\n",
-            stage_num, tid, done_seq_num);
+    DPRINTF(Resource, "[tid:%i] Broadcasting Squash All Event "
+            " starting w/stage %i for all instructions above [sn:%i].\n",
+             tid, stage_num, done_seq_num);
  
      int num_resources = resources.size();
  
@@ -276,23 +389,49 @@ ResourcePool::squashAll(DynInstPtr inst, int stage_num,
  }
  
  void
-ResourcePool::activateAll(ThreadID tid)
+ResourcePool::squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum done_seq_num, ThreadID tid)
  {
-    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all resources.\n",
-            tid);
+    DPRINTF(Resource, "[tid:%i] Broadcasting SquashDueToMemStall Event"
+            " starting w/stage %i for all instructions above [sn:%i].\n",
+            tid, stage_num, done_seq_num);
  
      int num_resources = resources.size();
  
      for (int idx = 0; idx < num_resources; idx++) {
-        resources[idx]->activateThread(tid);
+        resources[idx]->squashDueToMemStall(inst, stage_num, done_seq_num, 
+                                            tid);
      }
  }
  
+void
+ResourcePool::activateAll(ThreadID tid)
+{
+    bool do_activate = cpu->threadModel != InOrderCPU::SwitchOnCacheMiss ||
+        cpu->numActiveThreads() < 1 ||
+        cpu->activeThreadId() == tid;
+    
+        
+    if (do_activate) {
+        DPRINTF(Resource, "[tid:%i] Broadcasting Thread Activation to all "
+                    "resources.\n", tid);
+ 
+        int num_resources = resources.size();
+ 
+        for (int idx = 0; idx < num_resources; idx++) {
+            resources[idx]->activateThread(tid);
+        }
+    } else {
+        DPRINTF(Resource, "[tid:%i] Ignoring Thread Activation to all "
+                    "resources.\n", tid);
+     }
+}
+
  void
  ResourcePool::deactivateAll(ThreadID tid)
  {
-    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all resources.\n",
-            tid);
+    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Deactivation to all "
+            "resources.\n", tid);
  
      int num_resources = resources.size();
  
@@ -301,11 +440,24 @@ ResourcePool::deactivateAll(ThreadID tid)
      }
  }
  
+void
+ResourcePool::suspendAll(ThreadID tid)
+{
+    DPRINTF(Resource, "[tid:%i] Broadcasting Thread Suspension to all resources.\n",
+            tid);
+
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        resources[idx]->suspendThread(tid);
+    }
+}
+
  void
  ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
  {
-    DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all resources.\n",
-            tid, seq_num);
+    DPRINTF(Resource, "[tid:%i] Broadcasting [sn:%i] graduation to all "
+            "resources.\n", tid, seq_num);
  
      int num_resources = resources.size();
  
@@ -314,10 +466,18 @@ ResourcePool::instGraduated(InstSeqNum seq_num, ThreadID tid)
      }
  }
  
-ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool)
-    : Event(CPU_Tick_Pri), resPool(_resPool),
-      eventType((InOrderCPU::CPUEventType) Default)
-{ }
+void
+ResourcePool::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    DPRINTF(Resource, "[tid:%i] Broadcasting Update PC to all resources.\n",
+            tid);
+
+    int num_resources = resources.size();
+
+    for (int idx = 0; idx < num_resources; idx++) {
+        resources[idx]->updateAfterContextSwitch(inst, tid);
+    }
+}
  
  ResourcePool::ResPoolEvent::ResPoolEvent(ResourcePool *_resPool,
                                           InOrderCPU::CPUEventType e_type,
@@ -340,11 +500,15 @@ ResourcePool::ResPoolEvent::process()
          resPool->activateAll(tid);
          break;
  
-      case InOrderCPU::SuspendThread:
-      case InOrderCPU::DeallocateThread:
+      case InOrderCPU::DeactivateThread:
+      case InOrderCPU::HaltThread:
          resPool->deactivateAll(tid);
          break;
  
+      case InOrderCPU::SuspendThread:
+        resPool->suspendAll(tid);
+        break;
+
        case ResourcePool::InstGraduated:
          resPool->instGraduated(seqNum, tid);
          break;
@@ -353,6 +517,14 @@ ResourcePool::ResPoolEvent::process()
          resPool->squashAll(inst, stageNum, seqNum, tid);
          break;
  
+      case InOrderCPU::SquashFromMemStall:
+        resPool->squashDueToMemStall(inst, stageNum, seqNum, tid);
+        break;
+
+      case ResourcePool::UpdateAfterContextSwitch:
+        resPool->updateAfterContextSwitch(inst, tid);
+        break;
+
        default:
          fatal("Unrecognized Event Type");
      }
diff --git a/src/cpu/inorder/resource_pool.hh b/src/cpu/inorder/resource_pool.hh

index 016fae2bf83c3ad8466fb865b584d7176a30e3f4..60d35ab6136358adc1249c4677e8375ce4897b56 100644 (file)
--- a/src/cpu/inorder/resource_pool.hh
+++ b/src/cpu/inorder/resource_pool.hh
@@ -63,6 +63,7 @@ class ResourcePool {
      enum ResPoolEventType {
          InstGraduated = InOrderCPU::NumCPUEvents,
          SquashAll,
+        UpdateAfterContextSwitch,
          Default
      };
  
@@ -84,9 +85,6 @@ class ResourcePool {
          ThreadID tid;
  
        public:
-        /** Constructs a resource event. */
-        ResPoolEvent(ResourcePool *_resPool);
-
          /** Constructs a resource event. */
          ResPoolEvent(ResourcePool *_resPool,
                       InOrderCPU::CPUEventType e_type,
@@ -123,8 +121,8 @@ class ResourcePool {
      };
  
    public:
-  ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~ResourcePool() {}
+    ResourcePool(InOrderCPU *_cpu, ThePipeline::Params *params);
+    ~ResourcePool();    
  
      std::string name();
  
@@ -143,6 +141,7 @@ class ResourcePool {
  
      /** Returns a specific resource. */
      unsigned getResIdx(const std::string &res_name);
+    unsigned getResIdx(const ThePipeline::ResourceId &res_id);
  
      /** Returns a pointer to a resource */
      Resource* getResource(int res_idx) { return resources[res_idx]; }
@@ -160,12 +159,24 @@ class ResourcePool {
      void squashAll(DynInstPtr inst, int stage_num,
                     InstSeqNum done_seq_num, ThreadID tid);
  
+    /** Squash Resources in Pool after a memory stall 
+     *  NOTE: Only use during Switch-On-Miss Thread model
+     */    
+    void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum done_seq_num, ThreadID tid);
+
      /** Activate Thread in all resources */
      void activateAll(ThreadID tid);
  
      /** De-Activate Thread in all resources */
      void deactivateAll(ThreadID tid);
  
+    /** De-Activate Thread in all resources */
+    void suspendAll(ThreadID tid);
+
+    /** Broadcast Context Switch Update to all resources */
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
+
      /** Broadcast graduation to all resources */
      void instGraduated(InstSeqNum seq_num, ThreadID tid);
  
diff --git a/src/cpu/inorder/resources/cache_unit.cc b/src/cpu/inorder/resources/cache_unit.cc

index eb66e10f82a88a1c036a64d9b0ec9f1d77c1c063..cb1861ea9984824975379e5a1feda2b35c9e2b7a 100644 (file)
--- a/src/cpu/inorder/resources/cache_unit.cc
+++ b/src/cpu/inorder/resources/cache_unit.cc
@@ -40,6 +40,7 @@
  #include "cpu/inorder/resources/cache_unit.hh"
  #include "cpu/inorder/pipeline_traits.hh"
  #include "cpu/inorder/cpu.hh"
+#include "cpu/inorder/resource_pool.hh"
  #include "mem/request.hh"
  
  using namespace std;
@@ -49,14 +50,14 @@ using namespace ThePipeline;
  Tick
  CacheUnit::CachePort::recvAtomic(PacketPtr pkt)
  {
-    panic("DefaultFetch doesn't expect recvAtomic callback!");
+    panic("CacheUnit::CachePort doesn't expect recvAtomic callback!");
      return curTick;
  }
  
  void
  CacheUnit::CachePort::recvFunctional(PacketPtr pkt)
  {
-    panic("DefaultFetch doesn't expect recvFunctional callback!");
+    panic("CacheUnit::CachePort doesn't expect recvFunctional callback!");
  }
  
  void
@@ -65,7 +66,7 @@ CacheUnit::CachePort::recvStatusChange(Status status)
      if (status == RangeChange)
          return;
  
-    panic("DefaultFetch doesn't expect recvStatusChange callback!");
+    panic("CacheUnit::CachePort doesn't expect recvStatusChange callback!");
  }
  
  bool
@@ -84,8 +85,7 @@ CacheUnit::CachePort::recvRetry()
  CacheUnit::CacheUnit(string res_name, int res_id, int res_width,
          int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params)
      : Resource(res_name, res_id, res_width, res_latency, _cpu),
-      retryPkt(NULL), retrySlot(-1), cacheBlocked(false),
-      predecoder(NULL)
+      cachePortBlocked(false), predecoder(NULL)
  {
      cachePort = new CachePort(this);
  
@@ -131,18 +131,24 @@ CacheUnit::init()
  int
  CacheUnit::getSlot(DynInstPtr inst)
  {
+    ThreadID tid = inst->readTid();
+    
      if (tlbBlocked[inst->threadNumber]) {
          return -1;
      }
  
-    if (!inst->validMemAddr()) {
-        panic("Mem. Addr. must be set before requesting cache access\n");
+    // For a Split-Load, the instruction would have processed once already
+    // causing the address to be unset.
+    if (!inst->validMemAddr() && !inst->splitInst) {
+        panic("[tid:%i][sn:%i] Mem. Addr. must be set before requesting cache access\n",
+              inst->readTid(), inst->seqNum);
      }
  
      Addr req_addr = inst->getMemAddr();
  
      if (resName == "icache_port" ||
-        find(addrList.begin(), addrList.end(), req_addr) == addrList.end()) {
+        find(addrList[tid].begin(), addrList[tid].end(), req_addr) == 
+        addrList[tid].end()) {
  
          int new_slot = Resource::getSlot(inst);
  
@@ -150,37 +156,115 @@ CacheUnit::getSlot(DynInstPtr inst)
              return -1;
  
          inst->memTime = curTick;
-        addrList.push_back(req_addr);
-        addrMap[req_addr] = inst->seqNum;
-        DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
-                inst->readTid(), inst->seqNum, req_addr);
+        setAddrDependency(inst);            
          return new_slot;
      } else {
-        DPRINTF(InOrderCachePort,
-                "Denying request because there is an outstanding"
+        // Allow same instruction multiple accesses to same address
+        // should only happen maybe after a squashed inst. needs to replay
+        if (addrMap[tid][req_addr] == inst->seqNum) {
+            int new_slot = Resource::getSlot(inst);
+        
+            if (new_slot == -1)
+                return -1;     
+
+            return new_slot;       
+        } else {                    
+            DPRINTF(InOrderCachePort,
+                "[tid:%i] Denying request because there is an outstanding"
                  " request to/for addr. %08p. by [sn:%i] @ tick %i\n",
-                req_addr, addrMap[req_addr], inst->memTime);
-        return -1;
+                inst->readTid(), req_addr, addrMap[tid][req_addr], inst->memTime);
+            return -1;
+        }        
      }
+
+    return -1;   
  }
  
  void
-CacheUnit::freeSlot(int slot_num)
+CacheUnit::setAddrDependency(DynInstPtr inst)
  {
-    vector<Addr>::iterator vect_it = find(addrList.begin(), addrList.end(),
-            reqMap[slot_num]->inst->getMemAddr());
-    assert(vect_it != addrList.end());
+    Addr req_addr = inst->getMemAddr();
+    ThreadID tid = inst->readTid();
  
+    addrList[tid].push_back(req_addr);
+    addrMap[tid][req_addr] = inst->seqNum;
      DPRINTF(InOrderCachePort,
-            "[tid:%i]: Address %08p removed from dependency list\n",
-            reqMap[slot_num]->inst->readTid(), (*vect_it));
+            "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
+            inst->readTid(), inst->seqNum, req_addr);
+    DPRINTF(AddrDep,
+            "[tid:%i]: [sn:%i]: Address %08p added to dependency list\n",
+            inst->readTid(), inst->seqNum, req_addr);
+}
+
+void
+CacheUnit::removeAddrDependency(DynInstPtr inst)
+{
+    ThreadID tid = inst->readTid();
+
+    Addr mem_addr = inst->getMemAddr();
+    
+    // Erase from Address List
+    vector<Addr>::iterator vect_it = find(addrList[tid].begin(), addrList[tid].end(),
+                                          mem_addr);
+    assert(vect_it != addrList[tid].end() || inst->splitInst);
+
+    if (vect_it != addrList[tid].end()) {
+        DPRINTF(AddrDep,
+                "[tid:%i]: [sn:%i] Address %08p removed from dependency list\n",
+                inst->readTid(), inst->seqNum, (*vect_it));
+
+        addrList[tid].erase(vect_it);
+
+        // Erase From Address Map (Used for Debugging)
+        addrMap[tid].erase(addrMap[tid].find(mem_addr));
+    }
+    
+
+}
+
+ResReqPtr
+CacheUnit::findRequest(DynInstPtr inst)
+{
+    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
+    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+
+    while (map_it != map_end) {
+        CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
+        assert(cache_req);
+
+        if (cache_req &&
+            cache_req->getInst() == inst &&
+            cache_req->instIdx == inst->resSched.top()->idx) {
+            return cache_req;
+        }
+        map_it++;
+    }
+
+    return NULL;
+}
+
+ResReqPtr
+CacheUnit::findSplitRequest(DynInstPtr inst, int idx)
+{
+    map<int, ResReqPtr>::iterator map_it = reqMap.begin();
+    map<int, ResReqPtr>::iterator map_end = reqMap.end();
+
+    while (map_it != map_end) {
+        CacheRequest* cache_req = dynamic_cast<CacheRequest*>((*map_it).second);
+        assert(cache_req);
  
-    addrList.erase(vect_it);
+        if (cache_req &&
+            cache_req->getInst() == inst &&
+            cache_req->instIdx == idx) {
+            return cache_req;
+        }
+        map_it++;
+    }
  
-    Resource::freeSlot(slot_num);
+    return NULL;
  }
  
+
  ResReqPtr
  CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                       int slot_num, unsigned cmd)
@@ -195,6 +279,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
  
      switch (sched_entry->cmd)
      {
+      case InitSecondSplitRead:
+        pkt_cmd = MemCmd::ReadReq;
+
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: Read request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->split2ndAddr);
+        break;
+
        case InitiateReadData:
          pkt_cmd = MemCmd::ReadReq;
  
@@ -203,6 +295,14 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                  inst->readTid(), inst->seqNum, inst->getMemAddr());
          break;
  
+      case InitSecondSplitWrite:
+        pkt_cmd = MemCmd::WriteReq;
+
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: Write request from [sn:%i] for addr %08p\n",
+                inst->readTid(), inst->seqNum, inst->split2ndAddr);
+        break;
+
        case InitiateWriteData:
          pkt_cmd = MemCmd::WriteReq;
  
@@ -226,7 +326,8 @@ CacheUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
  
      return new CacheRequest(this, inst, stage_num, id, slot_num,
                              sched_entry->cmd, 0, pkt_cmd,
-                            0/*flags*/, this->cpu->readCpuId());
+                            0/*flags*/, this->cpu->readCpuId(),
+                            inst->resSched.top()->idx);
  }
  
  void
@@ -237,15 +338,17 @@ CacheUnit::requestAgain(DynInstPtr inst, bool &service_request)
  
      // Check to see if this instruction is requesting the same command
      // or a different one
-    if (cache_req->cmd != inst->resSched.top()->cmd) {
+    if (cache_req->cmd != inst->resSched.top()->cmd &&
+        cache_req->instIdx == inst->resSched.top()->idx) {
          // If different, then update command in the request
          cache_req->cmd = inst->resSched.top()->cmd;
          DPRINTF(InOrderCachePort,
-                "[tid:%i]: [sn:%i]: Updating the command for this instruction\n",
-                inst->readTid(), inst->seqNum);
+                "[tid:%i]: [sn:%i]: Updating the command for this "
+                "instruction\n ", inst->readTid(), inst->seqNum);
  
          service_request = true;
-    } else {
+    } else if (inst->resSched.top()->idx != CacheUnit::InitSecondSplitRead &&
+               inst->resSched.top()->idx != CacheUnit::InitSecondSplitWrite) {        
          // If same command, just check to see if memory access was completed
          // but dont try to re-execute
          DPRINTF(InOrderCachePort,
@@ -271,12 +374,25 @@ CacheUnit::doTLBAccess(DynInstPtr inst, CacheReqPtr cache_req, int acc_size,
                                              cpu->readCpuId(), inst->readTid());
              cache_req->memReq = inst->fetchMemReq;
      } else {
-            inst->dataMemReq = new Request(inst->readTid(), aligned_addr,
+        if (!cache_req->is2ndSplit()) {            
+            inst->dataMemReq = new Request(cpu->asid[tid], aligned_addr,
                                             acc_size, flags, inst->readPC(),
                                             cpu->readCpuId(), inst->readTid());
              cache_req->memReq = inst->dataMemReq;
+        } else {
+            assert(inst->splitInst);
+            
+            inst->splitMemReq = new Request(cpu->asid[tid], 
+                                            inst->split2ndAddr,
+                                            acc_size, 
+                                            flags, 
+                                            inst->readPC(),
+                                            cpu->readCpuId(), 
+                                            tid);
+            cache_req->memReq = inst->splitMemReq;            
+        }
      }
-
+    
  
      cache_req->fault =
          _tlb->translateAtomic(cache_req->memReq,
@@ -311,14 +427,93 @@ Fault
  CacheUnit::read(DynInstPtr inst, Addr addr, T &data, unsigned flags)
  {
      CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
-    assert(cache_req);
+    assert(cache_req && "Can't Find Instruction for Read!");
+
+    // The block size of our peer
+    unsigned blockSize = this->cachePort->peerBlockSize();
+
+    //The size of the data we're trying to read.
+    int dataSize = sizeof(T);
+
+    if (inst->split2ndAccess) {     
+        dataSize = inst->split2ndSize;
+        cache_req->splitAccess = true;        
+        cache_req->split2ndAccess = true;
+        
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Read Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, 
+                inst->getMemAddr(), inst->split2ndAddr);       
+    }  
+    
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    
+    if (secondAddr > addr && !inst->split2ndAccess) {
+        DPRINTF(InOrderCachePort, "%i: sn[%i] Split Read Access (1 of 2) for (%#x, %#x).\n", curTick, inst->seqNum, 
+                addr, secondAddr);       
+        
+        // Save All "Total" Split Information
+        // ==============================
+        inst->splitInst = true;        
+        inst->splitMemData = new uint8_t[dataSize];
+        inst->splitTotalSize = dataSize;
+        
+        if (!inst->splitInstSked) {
+            // Schedule Split Read/Complete for Instruction
+            // ==============================
+            int stage_num = cache_req->getStageNum();
+        
+            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+        
+            inst->resSched.push(new ScheduleEntry(stage_num, 
+                                                  stage_pri, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::InitSecondSplitRead,
+                                                  1)
+                );
+
+            inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                                  1/*stage_pri*/, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::CompleteSecondSplitRead, 
+                                                  1)
+                );
+            inst->splitInstSked = true;
+        } else {
+            DPRINTF(InOrderCachePort, "[tid:%i] [sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", 
+                    inst->readTid(), inst->seqNum, addr, secondAddr);                   
+        }
  
-    int acc_size =  sizeof(T);
-    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Read);
+        // Split Information for First Access
+        // ==============================
+        dataSize = secondAddr - addr;
+        cache_req->splitAccess = true;
+
+        // Split Information for Second Access
+        // ==============================
+        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndAddr = secondAddr;            
+        inst->split2ndDataPtr = inst->splitMemData + dataSize;            
+        inst->split2ndFlags = flags;        
+    }
+    
+    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Read);
  
      if (cache_req->fault == NoFault) {
-        cache_req->reqData = new uint8_t[acc_size];
-        doCacheAccess(inst, NULL);
+        if (!cache_req->splitAccess) {            
+            cache_req->reqData = new uint8_t[dataSize];
+            doCacheAccess(inst, NULL);
+        } else {
+            if (!inst->split2ndAccess) {                
+                cache_req->reqData = inst->splitMemData;
+            } else {
+                cache_req->reqData = inst->split2ndDataPtr;                
+            }
+            
+            doCacheAccess(inst, NULL, cache_req);            
+        }        
      }
  
      return cache_req->fault;
@@ -330,16 +525,93 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
              uint64_t *write_res)
  {
      CacheReqPtr cache_req = dynamic_cast<CacheReqPtr>(findRequest(inst));
-    assert(cache_req);
-
-    int acc_size =  sizeof(T);
-    doTLBAccess(inst, cache_req, acc_size, flags, TheISA::TLB::Write);
+    assert(cache_req && "Can't Find Instruction for Write!");
+
+    // The block size of our peer
+    unsigned blockSize = this->cachePort->peerBlockSize();
+
+    //The size of the data we're trying to read.
+    int dataSize = sizeof(T);
+
+    if (inst->split2ndAccess) {     
+        dataSize = inst->split2ndSize;
+        cache_req->splitAccess = true;        
+        cache_req->split2ndAccess = true;
+        
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (2 of 2) for (%#x, %#x).\n", inst->seqNum, 
+                inst->getMemAddr(), inst->split2ndAddr);       
+    }  
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + dataSize - 1, blockSize);
+
+    if (secondAddr > addr && !inst->split2ndAccess) {
+            
+        DPRINTF(InOrderCachePort, "[sn:%i] Split Write Access (1 of 2) for (%#x, %#x).\n", inst->seqNum, 
+                addr, secondAddr);       
+
+        // Save All "Total" Split Information
+        // ==============================
+        inst->splitInst = true;        
+        inst->splitTotalSize = dataSize;
+
+        if (!inst->splitInstSked) {
+            // Schedule Split Read/Complete for Instruction
+            // ==============================
+            int stage_num = cache_req->getStageNum();
+        
+            int stage_pri = ThePipeline::getNextPriority(inst, stage_num);
+        
+            inst->resSched.push(new ScheduleEntry(stage_num, 
+                                                  stage_pri, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::InitSecondSplitWrite,
+                                                  1)
+                );
+
+            inst->resSched.push(new ScheduleEntry(stage_num + 1, 
+                                                  1/*stage_pri*/, 
+                                                  cpu->resPool->getResIdx(DCache),
+                                                  CacheUnit::CompleteSecondSplitWrite, 
+                                                  1)
+                );
+            inst->splitInstSked = true;
+        } else {
+            DPRINTF(InOrderCachePort, "[tid:%i] sn:%i] Retrying Split Read Access (1 of 2) for (%#x, %#x).\n", 
+                    inst->readTid(), inst->seqNum, addr, secondAddr);                   
+        }
+        
+        
+
+        // Split Information for First Access
+        // ==============================
+        dataSize = secondAddr - addr;
+        cache_req->splitAccess = true;
+
+        // Split Information for Second Access
+        // ==============================
+        inst->split2ndSize = addr + sizeof(T) - secondAddr;
+        inst->split2ndAddr = secondAddr;            
+        inst->split2ndStoreDataPtr = &cache_req->inst->storeData;
+        inst->split2ndStoreDataPtr += dataSize;            
+        inst->split2ndFlags = flags;        
+        inst->splitInstSked = true;
+    }    
+        
+    doTLBAccess(inst, cache_req, dataSize, flags, TheISA::TLB::Write);
  
      if (cache_req->fault == NoFault) {
-        cache_req->reqData = new uint8_t[acc_size];
-        doCacheAccess(inst, write_res);
+        if (!cache_req->splitAccess) {            
+            // Remove this line since storeData is saved in INST?
+            cache_req->reqData = new uint8_t[dataSize];
+            doCacheAccess(inst, write_res);
+        } else {            
+            doCacheAccess(inst, write_res, cache_req);            
+        }        
+        
      }
-
+    
      return cache_req->fault;
  }
  
@@ -347,8 +619,8 @@ CacheUnit::write(DynInstPtr inst, T data, Addr addr, unsigned flags,
  void
  CacheUnit::execute(int slot_num)
  {
-    if (cacheBlocked) {
-        DPRINTF(InOrderCachePort, "Cache Blocked. Cannot Access\n");
+    if (cachePortBlocked) {
+        DPRINTF(InOrderCachePort, "Cache Port Blocked. Cannot Access\n");
          return;
      }
  
@@ -359,6 +631,8 @@ CacheUnit::execute(int slot_num)
  #if TRACING_ON
      ThreadID tid = inst->readTid();
      int seq_num = inst->seqNum;
+    std::string acc_type = "write";
+    
  #endif
  
      cache_req->fault = NoFault;
@@ -390,10 +664,14 @@ CacheUnit::execute(int slot_num)
          }
  
        case InitiateReadData:
+#if TRACING_ON
+        acc_type = "read";
+#endif        
        case InitiateWriteData:
+            
          DPRINTF(InOrderCachePort,
-                "[tid:%u]: Initiating data access to %s for addr. %08p\n",
-                tid, name(), cache_req->inst->getMemAddr());
+                "[tid:%u]: [sn:%i] Initiating data %s access to %s for addr. %08p\n",
+                tid, inst->seqNum, acc_type, name(), cache_req->inst->getMemAddr());
  
          inst->setCurResSlot(slot_num);
  
@@ -402,9 +680,29 @@ CacheUnit::execute(int slot_num)
          } else {
              inst->initiateAcc();
          }
+        
+        break;
+
+      case InitSecondSplitRead:
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i] Initiating split data read access to %s for addr. %08p\n",
+                tid, inst->seqNum, name(), cache_req->inst->split2ndAddr);
+        inst->split2ndAccess = true;
+        assert(inst->split2ndAddr != 0);
+        read(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags);        
+        break;
+
+      case InitSecondSplitWrite:
+        DPRINTF(InOrderCachePort,
+                "[tid:%u]: [sn:%i] Initiating split data write access to %s for addr. %08p\n",
+                tid, inst->seqNum, name(), cache_req->inst->getMemAddr());
  
+        inst->split2ndAccess = true;
+        assert(inst->split2ndAddr != 0);
+        write(inst, inst->split2ndAddr, inst->split2ndData, inst->split2ndFlags, NULL);        
          break;
  
+
        case CompleteFetch:
          if (cache_req->isMemAccComplete()) {
              DPRINTF(InOrderCachePort,
@@ -415,16 +713,24 @@ CacheUnit::execute(int slot_num)
              DPRINTF(InOrderCachePort, "[tid:%i]: Instruction [sn:%i] is: %s\n",
                      tid, seq_num, inst->staticInst->disassemble(inst->PC));
  
+            removeAddrDependency(inst);
+            
              delete cache_req->dataPkt;
+            
+            // Do not stall and switch threads for fetch... for now..
+            // TODO: We need to detect cache misses for latencies > 1
+            // cache_req->setMemStall(false);            
+            
              cache_req->done();
          } else {
              DPRINTF(InOrderCachePort,
-                    "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n",
+                     "[tid:%i]: [sn:%i]: Unable to Complete Fetch Access\n",
                      tid, inst->seqNum);
              DPRINTF(InOrderStall,
                      "STALL: [tid:%i]: Fetch miss from %08p\n",
                      tid, cache_req->inst->readPC());
              cache_req->setCompleted(false);
+            //cache_req->setMemStall(true);            
          }
          break;
  
@@ -437,14 +743,55 @@ CacheUnit::execute(int slot_num)
          if (cache_req->isMemAccComplete() ||
              inst->isDataPrefetch() ||
              inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
+            cache_req->setMemStall(false);            
              cache_req->done();
          } else {
              DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
                      tid, cache_req->inst->getMemAddr());
              cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
          }
          break;
  
+      case CompleteSecondSplitRead:
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Trying to Complete Split Data Read Access\n",
+                tid, inst->seqNum);
+
+        if (cache_req->isMemAccComplete() ||
+            inst->isDataPrefetch() ||
+            inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
+            cache_req->setMemStall(false);            
+            cache_req->done();
+        } else {
+            DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
+                    tid, cache_req->inst->split2ndAddr);
+            cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
+        }
+        break;
+
+      case CompleteSecondSplitWrite:
+        DPRINTF(InOrderCachePort,
+                "[tid:%i]: [sn:%i]: Trying to Complete Split Data Write Access\n",
+                tid, inst->seqNum);
+
+        if (cache_req->isMemAccComplete() ||
+            inst->isDataPrefetch() ||
+            inst->isInstPrefetch()) {
+            removeAddrDependency(inst);
+            cache_req->setMemStall(false);            
+            cache_req->done();
+        } else {
+            DPRINTF(InOrderStall, "STALL: [tid:%i]: Data miss from %08p\n",
+                    tid, cache_req->inst->split2ndAddr);
+            cache_req->setCompleted(false);
+            cache_req->setMemStall(true);            
+        }
+        break;
+        
        default:
          fatal("Unrecognized command to %s", resName);
      }
@@ -462,8 +809,7 @@ CacheUnit::prefetch(DynInstPtr inst)
      // Clean-Up cache resource request so
      // other memory insts. can use them
      cache_req->setCompleted();
-    cacheStatus = cacheAccessComplete;
-    cacheBlocked = false;
+    cachePortBlocked = false;
      cache_req->setMemAccPending(false);
      cache_req->setMemAccCompleted();
      inst->unsetMemAddr();
@@ -482,8 +828,7 @@ CacheUnit::writeHint(DynInstPtr inst)
      // Clean-Up cache resource request so
      // other memory insts. can use them
      cache_req->setCompleted();
-    cacheStatus = cacheAccessComplete;
-    cacheBlocked = false;
+    cachePortBlocked = false;
      cache_req->setMemAccPending(false);
      cache_req->setMemAccCompleted();
      inst->unsetMemAddr();
@@ -491,15 +836,21 @@ CacheUnit::writeHint(DynInstPtr inst)
  
  // @TODO: Split into doCacheRead() and doCacheWrite()
  Fault
-CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
+CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res, CacheReqPtr split_req)
  {
      Fault fault = NoFault;
  #if TRACING_ON
      ThreadID tid = inst->readTid();
  #endif
  
-    CacheReqPtr cache_req
-        = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
+    CacheReqPtr cache_req;
+    
+    if (split_req == NULL) {        
+        cache_req = dynamic_cast<CacheReqPtr>(reqMap[inst->getCurResSlot()]);
+    } else{
+        cache_req = split_req;
+    }        
+
      assert(cache_req);
  
      // Check for LL/SC and if so change command
@@ -510,25 +861,28 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
      if (cache_req->pktCmd == MemCmd::WriteReq) {
          cache_req->pktCmd =
              cache_req->memReq->isSwap() ? MemCmd::SwapReq :
-            (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq);
+            (cache_req->memReq->isLLSC() ? MemCmd::StoreCondReq 
+             : MemCmd::WriteReq);
      }
  
      cache_req->dataPkt = new CacheReqPacket(cache_req, cache_req->pktCmd,
-                                            Packet::Broadcast);
+                                            Packet::Broadcast, cache_req->instIdx);
  
      if (cache_req->dataPkt->isRead()) {
          cache_req->dataPkt->dataStatic(cache_req->reqData);
-    } else if (cache_req->dataPkt->isWrite()) {
-        cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);
-
+    } else if (cache_req->dataPkt->isWrite()) {        
+        if (inst->split2ndAccess) {            
+            cache_req->dataPkt->dataStatic(inst->split2ndStoreDataPtr);
+        } else {
+            cache_req->dataPkt->dataStatic(&cache_req->inst->storeData);            
+        }
+        
          if (cache_req->memReq->isCondSwap()) {
              assert(write_res);
              cache_req->memReq->setExtraData(*write_res);
          }
      }
  
-    cache_req->dataPkt->time = curTick;
-
      bool do_access = true;  // flag to suppress cache access
  
      Request *memReq = cache_req->dataPkt->req;
@@ -546,28 +900,18 @@ CacheUnit::doCacheAccess(DynInstPtr inst, uint64_t *write_res)
      if (do_access) {
          if (!cachePort->sendTiming(cache_req->dataPkt)) {
              DPRINTF(InOrderCachePort,
-                    "[tid:%i] [sn:%i] is waiting to retry request\n",
-                    tid, inst->seqNum);
-
-            retrySlot = cache_req->getSlot();
-            retryReq = cache_req;
-            retryPkt = cache_req->dataPkt;
-
-            cacheStatus = cacheWaitRetry;
-
-            //cacheBlocked = true;
-
-            DPRINTF(InOrderStall, "STALL: \n");
-
+                    "[tid:%i] [sn:%i] cannot access cache, because port "
+                    "is blocked. now waiting to retry request\n", tid, 
+                    inst->seqNum);
              cache_req->setCompleted(false);
+            cachePortBlocked = true;
          } else {
              DPRINTF(InOrderCachePort,
                      "[tid:%i] [sn:%i] is now waiting for cache response\n",
                      tid, inst->seqNum);
              cache_req->setCompleted();
              cache_req->setMemAccPending();
-            cacheStatus = cacheWaitResponse;
-            cacheBlocked = false;
+            cachePortBlocked = false;
          }
      } else if (!do_access && memReq->isLLSC()){
          // Store-Conditional instructions complete even if they "failed"
@@ -594,6 +938,7 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
  {
      // Cast to correct packet type
      CacheReqPacket* cache_pkt = dynamic_cast<CacheReqPacket*>(pkt);
+             
      assert(cache_pkt);
  
      if (cache_pkt->cacheReq->isSquashed()) {
@@ -601,9 +946,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
                  "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
                  cache_pkt->cacheReq->getInst()->readTid(),
                  cache_pkt->cacheReq->getInst()->seqNum);
+        DPRINTF(RefCount,
+                "Ignoring completion of squashed access, [tid:%i] [sn:%i]\n",
+                cache_pkt->cacheReq->getTid(),
+                cache_pkt->cacheReq->seqNum);
  
          cache_pkt->cacheReq->done();
          delete cache_pkt;
+
+        cpu->wakeCPU();
+
          return;
      }
  
@@ -615,7 +967,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
  
      // Cast to correct request type
      CacheRequest *cache_req = dynamic_cast<CacheReqPtr>(
-        findRequest(cache_pkt->cacheReq->getInst()));
+        findSplitRequest(cache_pkt->cacheReq->getInst(), cache_pkt->instIdx));
+
+    if (!cache_req) {
+        warn(
+                "[tid:%u]: [sn:%i]: Can't find slot for cache access to addr. %08p\n",
+                cache_pkt->cacheReq->getInst()->readTid(),
+                cache_pkt->cacheReq->getInst()->seqNum,
+                cache_pkt->cacheReq->getInst()->getMemAddr());
+    }
+    
      assert(cache_req);
  
  
@@ -641,8 +1002,9 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
              ExtMachInst ext_inst;
              StaticInstPtr staticInst = NULL;
              Addr inst_pc = inst->readPC();
-            MachInst mach_inst = TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
-                                (cache_pkt->getPtr<uint8_t>()));
+            MachInst mach_inst = 
+                TheISA::gtoh(*reinterpret_cast<TheISA::MachInst *>
+                             (cache_pkt->getPtr<uint8_t>()));
  
              predecoder.setTC(cpu->thread[tid]->getTC());
              predecoder.moreBytes(inst_pc, inst_pc, mach_inst);
@@ -660,9 +1022,33 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
              DPRINTF(InOrderCachePort,
                      "[tid:%u]: [sn:%i]: Processing cache access\n",
                      tid, inst->seqNum);
-
-            inst->completeAcc(pkt);
-
+            
+            if (inst->splitInst) {
+                inst->splitFinishCnt++;
+                
+                if (inst->splitFinishCnt == 2) {
+                    cache_req->memReq->setVirt(0/*inst->tid*/, 
+                                               inst->getMemAddr(),
+                                               inst->splitTotalSize,
+                                               0,
+                                               0);
+                    
+                    Packet split_pkt(cache_req->memReq, cache_req->pktCmd,
+                                     Packet::Broadcast);                    
+
+
+                    if (inst->isLoad()) {                        
+                        split_pkt.dataStatic(inst->splitMemData);
+                    } else  {                            
+                        split_pkt.dataStatic(&inst->storeData);                        
+                    }
+                    
+                    inst->completeAcc(&split_pkt);
+                }                
+            } else {                            
+                inst->completeAcc(pkt);
+            }
+            
              if (inst->isLoad()) {
                  assert(cache_pkt->isRead());
  
@@ -696,6 +1082,16 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
          cache_req->setMemAccPending(false);
          cache_req->setMemAccCompleted();
  
+        if (cache_req->isMemStall() && 
+            cpu->threadModel == InOrderCPU::SwitchOnCacheMiss) {    
+            DPRINTF(InOrderCachePort, "[tid:%u] Waking up from Cache Miss.\n", tid);
+            
+            cpu->activateContext(tid);            
+            
+            DPRINTF(ThreadModel, "Activating [tid:%i] after return from cache"
+                    "miss.\n", tid);            
+        }
+        
          // Wake up the CPU (if it went to sleep and was waiting on this
          // completion event).
          cpu->wakeCPU();
@@ -717,22 +1113,14 @@ CacheUnit::processCacheCompletion(PacketPtr pkt)
  void
  CacheUnit::recvRetry()
  {
-    DPRINTF(InOrderCachePort, "Retrying Request for [tid:%i] [sn:%i]\n",
-            retryReq->inst->readTid(), retryReq->inst->seqNum);
+    DPRINTF(InOrderCachePort, "Unblocking Cache Port. \n");
+    
+    assert(cachePortBlocked);
  
-    assert(retryPkt != NULL);
-    assert(cacheBlocked);
-    assert(cacheStatus == cacheWaitRetry);
+    // Clear the cache port for use again
+    cachePortBlocked = false;
  
-    if (cachePort->sendTiming(retryPkt)) {
-        cacheStatus = cacheWaitResponse;
-        retryPkt = NULL;
-        cacheBlocked = false;
-    } else {
-        DPRINTF(InOrderCachePort,
-                "Retry Request for [tid:%i] [sn:%i] failed\n",
-                retryReq->inst->readTid(), retryReq->inst->seqNum);
-    }
+    cpu->wakeCPU();
  }
  
  CacheUnitEvent::CacheUnitEvent()
@@ -755,7 +1143,8 @@ CacheUnitEvent::process()
  
      tlb_res->tlbBlocked[tid] = false;
  
-    tlb_res->cpu->pipelineStage[stage_num]->unsetResStall(tlb_res->reqMap[slotIdx], tid);
+    tlb_res->cpu->pipelineStage[stage_num]->
+        unsetResStall(tlb_res->reqMap[slotIdx], tid);
  
      req_ptr->tlbStall = false;
  
@@ -764,6 +1153,26 @@ CacheUnitEvent::process()
      }
  }
  
+void
+CacheUnit::squashDueToMemStall(DynInstPtr inst, int stage_num,
+                               InstSeqNum squash_seq_num, ThreadID tid)
+{
+    // If squashing due to memory stall, then we do NOT want to 
+    // squash the instruction that caused the stall so we
+    // increment the sequence number here to prevent that.
+    //
+    // NOTE: This is only for the SwitchOnCacheMiss Model
+    // NOTE: If you have multiple outstanding misses from the same
+    //       thread then you need to reevaluate this code
+    // NOTE: squash should originate from 
+    //       pipeline_stage.cc:processInstSchedule
+    DPRINTF(InOrderCachePort, "Squashing above [sn:%u]\n", 
+            squash_seq_num + 1);
+    
+    squash(inst, stage_num, squash_seq_num + 1, tid);    
+}
+
+
  void
  CacheUnit::squash(DynInstPtr inst, int stage_num,
                    InstSeqNum squash_seq_num, ThreadID tid)
@@ -784,6 +1193,14 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
                      "[tid:%i] Squashing request from [sn:%i]\n",
                      req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
  
+            if (req_ptr->isSquashed()) {
+                DPRINTF(AddrDep, "Request for [tid:%i] [sn:%i] already squashed, ignoring squash process.\n",
+                        req_ptr->getInst()->readTid(),
+                        req_ptr->getInst()->seqNum);
+                map_it++;                
+                continue;                
+            }
+            
              req_ptr->setSquashed();
  
              req_ptr->getInst()->setSquashed();
@@ -798,7 +1215,8 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
  
                  int stall_stage = reqMap[req_slot_num]->getStageNum();
  
-                cpu->pipelineStage[stall_stage]->unsetResStall(reqMap[req_slot_num], tid);
+                cpu->pipelineStage[stall_stage]->
+                    unsetResStall(reqMap[req_slot_num], tid);
              }
  
              if (!cache_req->tlbStall && !cache_req->isMemAccPending()) {
@@ -807,7 +1225,29 @@ CacheUnit::squash(DynInstPtr inst, int stage_num,
  
                  // Mark slot for removal from resource
                  slot_remove_list.push_back(req_ptr->getSlot());
+
+                DPRINTF(InOrderCachePort,
+                        "[tid:%i] Squashing request from [sn:%i]\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
+            } else {
+                DPRINTF(InOrderCachePort,
+                        "[tid:%i] Request from [sn:%i] squashed, but still pending completion.\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum);
+                DPRINTF(RefCount,
+                        "[tid:%i] Request from [sn:%i] squashed (split:%i), but still pending completion.\n",
+                        req_ptr->getInst()->readTid(), req_ptr->getInst()->seqNum,
+                        req_ptr->getInst()->splitInst);
              }
+
+            if (req_ptr->getInst()->validMemAddr()) {                    
+                DPRINTF(AddrDep, "Squash of [tid:%i] [sn:%i], attempting to remove addr. %08p dependencies.\n",
+                        req_ptr->getInst()->readTid(),
+                        req_ptr->getInst()->seqNum, 
+                        req_ptr->getInst()->getMemAddr());
+                
+                removeAddrDependency(req_ptr->getInst());
+            }
+
          }
  
          map_it++;
@@ -927,14 +1367,16 @@ CacheUnit::write(DynInstPtr inst, uint8_t data, Addr addr,
  
  template<>
  Fault
-CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, double data, Addr addr, unsigned flags, 
+                 uint64_t *res)
  {
      return write(inst, *(uint64_t*)&data, addr, flags, res);
  }
  
  template<>
  Fault
-CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, 
+                 uint64_t *res)
  {
      return write(inst, *(uint32_t*)&data, addr, flags, res);
  }
@@ -942,7 +1384,9 @@ CacheUnit::write(DynInstPtr inst, float data, Addr addr, unsigned flags, uint64_
  
  template<>
  Fault
-CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, uint64_t *res)
+CacheUnit::write(DynInstPtr inst, int32_t data, Addr addr, unsigned flags, 
+                 uint64_t *res)
  {
      return write(inst, (uint32_t)data, addr, flags, res);
  }
+
diff --git a/src/cpu/inorder/resources/cache_unit.hh b/src/cpu/inorder/resources/cache_unit.hh

index c467e977188f7d69ba9a708072a7fb1b074ab57c..9004f3b93102e24621f00458df5606d2ca037b0b 100644 (file)
--- a/src/cpu/inorder/resources/cache_unit.hh
+++ b/src/cpu/inorder/resources/cache_unit.hh
@@ -62,7 +62,6 @@ class CacheUnit : public Resource
    public:
      CacheUnit(std::string res_name, int res_id, int res_width,
                int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~CacheUnit() {}
  
      enum Command {
          InitiateFetch,
@@ -73,7 +72,11 @@ class CacheUnit : public Resource
          CompleteWriteData,
          Fetch,
          ReadData,
-        WriteData
+        WriteData,
+        InitSecondSplitRead,
+        InitSecondSplitWrite,
+        CompleteSecondSplitRead,
+        CompleteSecondSplitWrite
      };
  
    public:
@@ -119,24 +122,19 @@ class CacheUnit : public Resource
          virtual void recvRetry();
      };
  
-    enum CachePortStatus {
-        cacheWaitResponse,
-        cacheWaitRetry,
-        cacheAccessComplete
-    };
-
      void init();
  
      virtual ResourceRequest* getRequest(DynInstPtr _inst, int stage_num,
                                          int res_idx, int slot_num,
                                          unsigned cmd);
  
+    ResReqPtr findRequest(DynInstPtr inst);
+    ResReqPtr findSplitRequest(DynInstPtr inst, int idx);
+
      void requestAgain(DynInstPtr inst, bool &try_request);
  
      int getSlot(DynInstPtr inst);
  
-    void freeSlot(int slot_num);
-
      /** Execute the function of this resource. The Default is action
       *  is to do nothing. More specific models will derive from this
       *  class and define their own execute function.
@@ -146,6 +144,9 @@ class CacheUnit : public Resource
      void squash(DynInstPtr inst, int stage_num,
                  InstSeqNum squash_seq_num, ThreadID tid);
  
+    void squashDueToMemStall(DynInstPtr inst, int stage_num,
+                             InstSeqNum squash_seq_num, ThreadID tid);
+
      /** Processes cache completion event. */
      void processCacheCompletion(PacketPtr pkt);
  
@@ -159,7 +160,7 @@ class CacheUnit : public Resource
  
      /** Returns a specific port. */
      Port *getPort(const std::string &if_name, int idx);
-
+    
      template <class T>
      Fault read(DynInstPtr inst, Addr addr, T &data, unsigned flags);
  
@@ -173,7 +174,7 @@ class CacheUnit : public Resource
      /** Read/Write on behalf of an instruction.
       *  curResSlot needs to be a valid value in instruction.
       */
-    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL);
+    Fault doCacheAccess(DynInstPtr inst, uint64_t *write_result=NULL, CacheReqPtr split_req=NULL);
  
      void prefetch(DynInstPtr inst);
  
@@ -181,23 +182,18 @@ class CacheUnit : public Resource
  
      uint64_t getMemData(Packet *packet);
  
+    void setAddrDependency(DynInstPtr inst);
+    void removeAddrDependency(DynInstPtr inst);
+    
    protected:
      /** Cache interface. */
      CachePort *cachePort;
  
-    CachePortStatus cacheStatus;
-
-    CacheReqPtr retryReq;
+    bool cachePortBlocked;
  
-    PacketPtr retryPkt;
-
-    int retrySlot;
-
-    bool cacheBlocked;
-
-    std::vector<Addr> addrList;
+    std::vector<Addr> addrList[ThePipeline::MaxThreads];
  
-    std::map<Addr, InstSeqNum> addrMap;
+    std::map<Addr, InstSeqNum> addrMap[ThePipeline::MaxThreads];
  
    public:
      int cacheBlkSize;
@@ -249,17 +245,18 @@ class CacheRequest : public ResourceRequest
    public:
      CacheRequest(CacheUnit *cres, DynInstPtr inst, int stage_num, int res_idx,
                   int slot_num, unsigned cmd, int req_size,
-                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id)
+                 MemCmd::Command pkt_cmd, unsigned flags, int cpu_id, int idx)
          : ResourceRequest(cres, inst, stage_num, res_idx, slot_num, cmd),
            pktCmd(pkt_cmd), memReq(NULL), reqData(NULL), dataPkt(NULL),
            retryPkt(NULL), memAccComplete(false), memAccPending(false),
-          tlbStall(false)
+          tlbStall(false), splitAccess(false), splitAccessNum(-1),
+          split2ndAccess(false), instIdx(idx)
      { }
  
  
      virtual ~CacheRequest()
      {
-        if (reqData) {
+        if (reqData && !splitAccess) {
              delete [] reqData;
          }
      }
@@ -273,6 +270,11 @@ class CacheRequest : public ResourceRequest
          memAccComplete = completed;
      }
  
+    bool is2ndSplit() 
+    {
+        return split2ndAccess;
+    }
+    
      bool isMemAccComplete() { return memAccComplete; }
  
      void setMemAccPending(bool pending = true) { memAccPending = pending; }
@@ -288,19 +290,27 @@ class CacheRequest : public ResourceRequest
      bool memAccComplete;
      bool memAccPending;
      bool tlbStall;
+
+    bool splitAccess;
+    int splitAccessNum;
+    bool split2ndAccess;
+    int instIdx;    
+    
  };
  
  class CacheReqPacket : public Packet
  {
    public:
      CacheReqPacket(CacheRequest *_req,
-                   Command _cmd, short _dest)
-        : Packet(_req->memReq, _cmd, _dest), cacheReq(_req)
+                   Command _cmd, short _dest, int _idx = 0)
+        : Packet(_req->memReq, _cmd, _dest), cacheReq(_req), instIdx(_idx)
      {
  
      }
  
      CacheRequest *cacheReq;
+    int instIdx;
+    
  };
  
  #endif //__CPU_CACHE_UNIT_HH__
diff --git a/src/cpu/inorder/resources/execution_unit.cc b/src/cpu/inorder/resources/execution_unit.cc

index 6c44e2456acf9d288fbc173468c87652edcb5307..4292912315a83b0ef5a3110c0a30722b24382882 100644 (file)
--- a/src/cpu/inorder/resources/execution_unit.cc
+++ b/src/cpu/inorder/resources/execution_unit.cc
@@ -54,6 +54,17 @@ ExecutionUnit::regStats()
          .name(name() + ".predictedNotTakenIncorrect")
          .desc("Number of Branches Incorrectly Predicted As Not Taken).");
  
+    lastExecuteCycle = curTick;
+
+    cyclesExecuted
+        .name(name() + ".cyclesExecuted")
+        .desc("Number of Cycles Execution Unit was used.");
+
+    utilization
+        .name(name() + ".utilization")
+        .desc("Utilization of Execution Unit (cycles / totalCycles).");
+    utilization = cyclesExecuted / cpu->numCycles;
+
      Resource::regStats();
  }
  
@@ -75,6 +86,12 @@ ExecutionUnit::execute(int slot_num)
      {
        case ExecuteInst:
          {
+            if (curTick != lastExecuteCycle) {
+                lastExecuteCycle = curTick;
+                cyclesExecuted++;
+            }
+
+
              if (inst->isMemRef()) {
                  panic("%s not configured to handle memory ops.\n", resName);
              } else if (inst->isControl()) {
diff --git a/src/cpu/inorder/resources/execution_unit.hh b/src/cpu/inorder/resources/execution_unit.hh

index 46691bbf25e13c10a758953087353807409fa595..b9cf1d4281c081c39a2634026e93a1425fc56fb2 100644 (file)
--- a/src/cpu/inorder/resources/execution_unit.hh
+++ b/src/cpu/inorder/resources/execution_unit.hh
@@ -52,7 +52,6 @@ class ExecutionUnit : public Resource {
    public:
      ExecutionUnit(std::string res_name, int res_id, int res_width,
                int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~ExecutionUnit() {}
  
    public:
      virtual void regStats();
@@ -71,6 +70,11 @@ class ExecutionUnit : public Resource {
      /////////////////////////////////////////////////////////////////
      Stats::Scalar predictedTakenIncorrect;
      Stats::Scalar predictedNotTakenIncorrect;
+
+    Stats::Scalar cyclesExecuted;
+    Tick lastExecuteCycle;
+
+    Stats::Formula utilization;
  };
  
  
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.cc b/src/cpu/inorder/resources/fetch_seq_unit.cc

index 1d0b9207523bddc1ae5f47575db5152f0f277eb9..03663881c1a3f8bd182db9f34e750995b6ac0326 100644 (file)
--- a/src/cpu/inorder/resources/fetch_seq_unit.cc
+++ b/src/cpu/inorder/resources/fetch_seq_unit.cc
@@ -54,6 +54,11 @@ FetchSeqUnit::FetchSeqUnit(std::string res_name, int res_id, int res_width,
      }
  }
  
+FetchSeqUnit::~FetchSeqUnit()
+{
+    delete [] resourceEvent;
+}
+
  void
  FetchSeqUnit::init()
  {
@@ -336,3 +341,35 @@ FetchSeqUnit::deactivateThread(ThreadID tid)
      if (thread_it != cpu->fetchPriorityList.end())
          cpu->fetchPriorityList.erase(thread_it);
  }
+
+void
+FetchSeqUnit::suspendThread(ThreadID tid)
+{
+    deactivateThread(tid);    
+}
+
+void
+FetchSeqUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    pcValid[tid] = true;
+
+    if (cpu->thread[tid]->lastGradIsBranch) {
+        /** This function assumes that the instruction causing the context
+         *  switch was right after the branch. Thus, if it's not, then
+         *  we are updating incorrectly here
+         */
+        assert(cpu->thread[tid]->lastBranchNextPC == inst->readPC());
+        
+        PC[tid] = cpu->thread[tid]->lastBranchNextNPC;
+        nextPC[tid] = PC[tid] + instSize;
+        nextNPC[tid] = nextPC[tid] + instSize;
+    } else {
+        PC[tid] = inst->readNextPC();
+        nextPC[tid] = inst->readNextNPC();
+        nextNPC[tid] = inst->readNextNPC() + instSize;        
+    }
+    
+    DPRINTF(InOrderFetchSeq, "[tid:%i]: Updating PCs due to Context Switch."
+            "Assigning  PC:%08p NPC:%08p NNPC:%08p.\n", tid, PC[tid], 
+            nextPC[tid], nextNPC[tid]);
+}
diff --git a/src/cpu/inorder/resources/fetch_seq_unit.hh b/src/cpu/inorder/resources/fetch_seq_unit.hh

index a4495564b6a4e333df637eae917b663f8f6e1055..289e150aa166d3c5c87a8b5d17d5d1e435d049e0 100644 (file)
--- a/src/cpu/inorder/resources/fetch_seq_unit.hh
+++ b/src/cpu/inorder/resources/fetch_seq_unit.hh
@@ -54,12 +54,15 @@ class FetchSeqUnit : public Resource {
    public:
      FetchSeqUnit(std::string res_name, int res_id, int res_width,
                int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~FetchSeqUnit() {}
-
+    virtual ~FetchSeqUnit();
+    
      virtual void init();
      virtual void activateThread(ThreadID tid);
      virtual void deactivateThread(ThreadID tid);
+    virtual void suspendThread(ThreadID tid);
      virtual void execute(int slot_num);
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);
+    
  
      /** Override default Resource squash sequence. This actually,
       *  looks in the global communication buffer to get squash
diff --git a/src/cpu/inorder/resources/graduation_unit.cc b/src/cpu/inorder/resources/graduation_unit.cc

index 2d7cd5c8c7f28746f4706f4acfe770e2a365adf6..2dad9889abdf9d0f2b3d9542a59bb2ab4b94901a 100644 (file)
--- a/src/cpu/inorder/resources/graduation_unit.cc
+++ b/src/cpu/inorder/resources/graduation_unit.cc
@@ -79,8 +79,6 @@ GraduationUnit::execute(int slot_num)
                      "[tid:%i] Graduating instruction [sn:%i].\n",
                      tid, inst->seqNum);
  
-            DPRINTF(RefCount, "Refcount = %i.\n", 0/*inst->curCount()*/);
-
              // Release Non-Speculative "Block" on instructions that could not execute
              // because there was a non-speculative inst. active.
              // @TODO: Fix this functionality. Probably too conservative.
diff --git a/src/cpu/inorder/resources/graduation_unit.hh b/src/cpu/inorder/resources/graduation_unit.hh

index ad222b11943b0f826388e1bc029479d35ca5db4f..7f0db98d029e3ac59e3d20ce165608e4e6fb66a9 100644 (file)
--- a/src/cpu/inorder/resources/graduation_unit.hh
+++ b/src/cpu/inorder/resources/graduation_unit.hh
@@ -63,8 +63,6 @@ class GraduationUnit : public Resource {
      bool *nonSpecInstActive[ThePipeline::MaxThreads];
  
      InstSeqNum *nonSpecSeqNum[ThePipeline::MaxThreads];
-
-    /** @todo: Add Resource Stats Here */
  };
  
  #endif //__CPU_INORDER_GRAD_UNIT_HH__
diff --git a/src/cpu/inorder/resources/inst_buffer.cc b/src/cpu/inorder/resources/inst_buffer.cc

index bb308b0ea2f6ba9357454d2513a925f7de1dfc08..17b308db09e5800622b1cee60fb612a0dbc58f68 100644 (file)
--- a/src/cpu/inorder/resources/inst_buffer.cc
+++ b/src/cpu/inorder/resources/inst_buffer.cc
@@ -52,7 +52,8 @@ InstBuffer::regStats()
  {
      instsBypassed
          .name(name() + ".instsBypassed")
-        .desc("Number of Instructions Bypassed.");
+        .desc("Number of Instructions Bypassed.")
+        .prereq(instsBypassed);    
  
      Resource::regStats();
  }
diff --git a/src/cpu/inorder/resources/mult_div_unit.hh b/src/cpu/inorder/resources/mult_div_unit.hh

index d3dd0260d434a2f25a4f7681f71ae4b7368666c4..19688b09fd46403708cf6f6400a3574908c29838 100644 (file)
--- a/src/cpu/inorder/resources/mult_div_unit.hh
+++ b/src/cpu/inorder/resources/mult_div_unit.hh
@@ -57,7 +57,6 @@ class MultDivUnit : public Resource {
    public:
      MultDivUnit(std::string res_name, int res_id, int res_width,
                int res_latency, InOrderCPU *_cpu, ThePipeline::Params *params);
-    virtual ~MultDivUnit() {}
  
    public:
      /** Override default Resource getSlot(). Will only getSlot if
diff --git a/src/cpu/inorder/resources/use_def.cc b/src/cpu/inorder/resources/use_def.cc

index 36392d054ae9e6db926d463702064e3340ab3594..5fd6a4724c89f0e69fceafad704726bdebf1bbdd 100644 (file)
--- a/src/cpu/inorder/resources/use_def.cc
+++ b/src/cpu/inorder/resources/use_def.cc
@@ -59,6 +59,17 @@ UseDefUnit::UseDefUnit(string res_name, int res_id, int res_width,
  
  }
  
+void
+UseDefUnit::regStats()
+{
+    uniqueRegsPerSwitch
+        .name(name() + ".uniqueRegsPerSwitch")
+        .desc("Number of Unique Registers Needed Per Context Switch")
+        .prereq(uniqueRegsPerSwitch);
+    
+    Resource::regStats();
+}
+
  ResReqPtr
  UseDefUnit::getRequest(DynInstPtr inst, int stage_num, int res_idx,
                       int slot_num, unsigned cmd)
@@ -75,7 +86,8 @@ UseDefUnit::findRequest(DynInstPtr inst)
      map<int, ResReqPtr>::iterator map_end = reqMap.end();
  
      while (map_it != map_end) {
-        UseDefRequest* ud_req = dynamic_cast<UseDefRequest*>((*map_it).second);
+        UseDefRequest* ud_req = 
+            dynamic_cast<UseDefRequest*>((*map_it).second);
          assert(ud_req);
  
          if (ud_req &&
@@ -107,9 +119,9 @@ UseDefUnit::execute(int slot_idx)
      // in the pipeline then stall instructions here
      if (*nonSpecInstActive[tid] == true &&
          seq_num > *nonSpecSeqNum[tid]) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because there is "
-                "non-speculative instruction [sn:%i] has not graduated.\n",
-                tid, seq_num, *nonSpecSeqNum[tid]);
+        DPRINTF(InOrderUseDef, "[tid:%i]: [sn:%i] cannot execute because"
+                "there is non-speculative instruction [sn:%i] has not "
+                "graduated.\n", tid, seq_num, *nonSpecSeqNum[tid]);
          return;
      } else if (inst->isNonSpeculative()) {
          *nonSpecInstActive[tid] = true;
@@ -121,91 +133,134 @@ UseDefUnit::execute(int slot_idx)
        case ReadSrcReg:
          {
              int reg_idx = inst->_srcRegIdx[ud_idx];
-
-            DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source register idx %i (reg #%i).\n",
+            
+            DPRINTF(InOrderUseDef, "[tid:%i]: Attempting to read source "
+                    "register idx %i (reg #%i).\n",
                      tid, ud_idx, reg_idx);
  
-            // Ask register dependency map if it is OK to read from Arch. Reg. File
+            // Ask register dependency map if it is OK to read from Arch. 
+            // Reg. File
              if (regDepMap[tid]->canRead(reg_idx, inst)) {
+                
+                uniqueRegMap[reg_idx] = true;
+
                  if (inst->seqNum <= outReadSeqNum[tid]) {
                      if (reg_idx < FP_Base_DepTag) {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i from Register File:%i.\n",
-                                tid, reg_idx, cpu->readIntReg(reg_idx,inst->readTid()));
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Int Reg %i"
+                                "from Register File:%i.\n",
+                                tid, 
+                                reg_idx, 
+                                cpu->readIntReg(reg_idx,inst->readTid()));
                          inst->setIntSrc(ud_idx,
-                                        cpu->readIntReg(reg_idx,inst->readTid()));
+                                        cpu->readIntReg(reg_idx,
+                                                        inst->readTid()));
                      } else if (reg_idx < Ctrl_Base_DepTag) {
                          reg_idx -= FP_Base_DepTag;
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i from Register File:%x (%08f).\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Float Reg %i"
+                                "from Register File:%x (%08f).\n",
                                  tid,
                                  reg_idx,
-                                cpu->readFloatRegBits(reg_idx, inst->readTid()),
-                                cpu->readFloatReg(reg_idx, inst->readTid()));
+                                cpu->readFloatRegBits(reg_idx, 
+                                                      inst->readTid()),
+                                cpu->readFloatReg(reg_idx, 
+                                                  inst->readTid()));
  
                          inst->setFloatSrc(ud_idx,
-                                          cpu->readFloatReg(reg_idx, inst->readTid()));
+                                          cpu->readFloatReg(reg_idx, 
+                                                            inst->readTid()));
                      } else {
                          reg_idx -= Ctrl_Base_DepTag;
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i from Register File:%i.\n",
-                                tid, reg_idx, cpu->readMiscReg(reg_idx, inst->readTid()));
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Reading Misc Reg %i "
+                                "from Register File:%i.\n",
+                                tid, 
+                                reg_idx, 
+                                cpu->readMiscReg(reg_idx, 
+                                                 inst->readTid()));
                          inst->setIntSrc(ud_idx,
-                                        cpu->readMiscReg(reg_idx, inst->readTid()));
+                                        cpu->readMiscReg(reg_idx, 
+                                                         inst->readTid()));
                      }
  
                      outReadSeqNum[tid] = maxSeqNum;
  
                      ud_req->done();
                  } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
-                            " registers yet.\n", tid, outReadSeqNum[tid]);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to write\n",
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because "
+                            "of [sn:%i] hasnt read it's registers yet.\n", 
+                            tid, outReadSeqNum[tid]);
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                            "[sn:%i] to write\n",
                              tid, outReadSeqNum[tid]);
+                    ud_req->done(false);
                  }
  
              } else {
                  // Look for forwarding opportunities
-                DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, ud_idx, inst);
+                DynInstPtr forward_inst = regDepMap[tid]->canForward(reg_idx, 
+                                                                     ud_idx, 
+                                                                     inst);
  
                  if (forward_inst) {
  
                      if (inst->seqNum <= outReadSeqNum[tid]) {
-                        int dest_reg_idx = forward_inst->getDestIdxNum(reg_idx);
+                        int dest_reg_idx = 
+                            forward_inst->getDestIdxNum(reg_idx);
  
                          if (reg_idx < FP_Base_DepTag) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                      "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readIntResult(dest_reg_idx) ,
-                                    forward_inst->seqNum, inst->seqNum, ud_idx);
-                            inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
+                                    tid, 
+                                    forward_inst->readIntResult(dest_reg_idx),
+                                    forward_inst->seqNum, 
+                                    inst->seqNum, ud_idx);
+                            inst->setIntSrc(ud_idx, 
+                                            forward_inst->
+                                            readIntResult(dest_reg_idx));
                          } else if (reg_idx < Ctrl_Base_DepTag) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                      "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readFloatResult(dest_reg_idx) ,
+                                    tid, 
+                                    forward_inst->readFloatResult(dest_reg_idx),
                                      forward_inst->seqNum, inst->seqNum, ud_idx);
                              inst->setFloatSrc(ud_idx,
-                                              forward_inst->readFloatResult(dest_reg_idx));
+                                              forward_inst->
+                                              readFloatResult(dest_reg_idx));
                          } else {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest. reg value 0x%x from "
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Forwarding dest."
+                                    " reg value 0x%x from "
                                      "[sn:%i] to [sn:%i] source #%i.\n",
-                                    tid, forward_inst->readIntResult(dest_reg_idx) ,
-                                    forward_inst->seqNum, inst->seqNum, ud_idx);
-                            inst->setIntSrc(ud_idx, forward_inst->readIntResult(dest_reg_idx));
+                                    tid, 
+                                    forward_inst->readIntResult(dest_reg_idx),
+                                    forward_inst->seqNum, 
+                                    inst->seqNum, ud_idx);
+                            inst->setIntSrc(ud_idx, 
+                                            forward_inst->
+                                            readIntResult(dest_reg_idx));
                          }
  
                          outReadSeqNum[tid] = maxSeqNum;
  
                          ud_req->done();
                      } else {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read because of [sn:%i] hasnt read it's"
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Unable to read "
+                                "because of [sn:%i] hasnt read it's"
                                  " registers yet.\n", tid, outReadSeqNum[tid]);
-                        DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to forward\n",
+                        DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                                "[sn:%i] to forward\n",
                                  tid, outReadSeqNum[tid]);
+                        ud_req->done(false);
                      }
                  } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i is not ready to read.\n",
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Source register idx: %i"
+                            "is not ready to read.\n",
                              tid, reg_idx);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read register (idx=%i)\n",
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to read "
+                            "register (idx=%i)\n",
                              tid, reg_idx);
                      outReadSeqNum[tid] = inst->seqNum;
+                    ud_req->done(false);
                  }
              }
          }
@@ -216,12 +271,14 @@ UseDefUnit::execute(int slot_idx)
              int reg_idx = inst->_destRegIdx[ud_idx];
  
              if (regDepMap[tid]->canWrite(reg_idx, inst)) {
-                DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i & Attempting to write to Register File.\n",
+                DPRINTF(InOrderUseDef, "[tid:%i]: Flattening register idx %i &"
+                        "Attempting to write to Register File.\n",
                          tid, reg_idx);
-
+                uniqueRegMap[reg_idx] = true;
                  if (inst->seqNum <= outReadSeqNum[tid]) {
                      if (reg_idx < FP_Base_DepTag) {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result 0x%x to register idx %i.\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Int. Result "
+                                "0x%x to register idx %i.\n",
                                  tid, inst->readIntResult(ud_idx), reg_idx);
  
                          // Remove Dependencies
@@ -236,33 +293,54 @@ UseDefUnit::execute(int slot_idx)
  
                          reg_idx -= FP_Base_DepTag;
  
-                        if (inst->resultType(ud_idx) == InOrderDynInst::Integer) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
-
-                            cpu->setFloatRegBits(reg_idx, // Check for FloatRegBits Here
+                        if (inst->resultType(ud_idx) == 
+                            InOrderDynInst::Integer) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing FP-Bits "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, 
+                                    inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
+
+                            // Check for FloatRegBits Here
+                            cpu->setFloatRegBits(reg_idx, 
                                               inst->readIntResult(ud_idx),
                                               inst->readTid());
-                        } else if (inst->resultType(ud_idx) == InOrderDynInst::Float) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
+                        } else if (inst->resultType(ud_idx) == 
+                                   InOrderDynInst::Float) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Float "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
  
                              cpu->setFloatReg(reg_idx,
                                               inst->readFloatResult(ud_idx),
                                               inst->readTid());
-                        } else if (inst->resultType(ud_idx) == InOrderDynInst::Double) {
-                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double Result 0x%x (bits:0x%x) to register idx %i.\n",
-                                    tid, inst->readFloatResult(ud_idx), inst->readIntResult(ud_idx), reg_idx);
-
-                            cpu->setFloatReg(reg_idx, // Check for FloatRegBits Here
+                        } else if (inst->resultType(ud_idx) == 
+                                   InOrderDynInst::Double) {
+                            DPRINTF(InOrderUseDef, "[tid:%i]: Writing Double "
+                                    "Result 0x%x (bits:0x%x) to register "
+                                    "idx %i.\n",
+                                    tid, 
+                                    inst->readFloatResult(ud_idx), 
+                                    inst->readIntResult(ud_idx), 
+                                    reg_idx);
+
+                            // Check for FloatRegBits Here
+                            cpu->setFloatReg(reg_idx, 
                                               inst->readFloatResult(ud_idx),
                                               inst->readTid());
                          } else {
-                            panic("Result Type Not Set For [sn:%i] %s.\n", inst->seqNum, inst->instName());
+                            panic("Result Type Not Set For [sn:%i] %s.\n", 
+                                  inst->seqNum, inst->instName());
                          }
  
                      } else {
-                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x to register idx %i.\n",
+                        DPRINTF(InOrderUseDef, "[tid:%i]: Writing Misc. 0x%x "
+                                "to register idx %i.\n",
                                  tid, inst->readIntResult(ud_idx), reg_idx);
  
                          // Remove Dependencies
@@ -279,17 +357,23 @@ UseDefUnit::execute(int slot_idx)
  
                      ud_req->done();
                  } else {
-                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because of [sn:%i] hasnt read it's"
+                    DPRINTF(InOrderUseDef, "[tid:%i]: Unable to write because "
+                            "of [sn:%i] hasnt read it's"
                              " registers yet.\n", tid, outReadSeqNum);
-                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for [sn:%i] to read\n",
+                    DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting for "
+                            "[sn:%i] to read\n",
                              tid, outReadSeqNum);
+                    ud_req->done(false);
                  }
              } else {
-                DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is not ready to write.\n",
+                DPRINTF(InOrderUseDef, "[tid:%i]: Dest. register idx: %i is "
+                        "not ready to write.\n",
                          tid, reg_idx);
-                DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write register (idx=%i)\n",
+                DPRINTF(InOrderStall, "STALL: [tid:%i]: waiting to write "
+                        "register (idx=%i)\n",
                          tid, reg_idx);
                  outWriteSeqNum[tid] = inst->seqNum;
+                ud_req->done(false);
              }
          }
          break;
@@ -323,12 +407,16 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
                      req_ptr->getInst()->readTid(),
                      req_ptr->getInst()->seqNum);
  
-            regDepMap[tid]->remove(req_ptr->getInst());
-
              int req_slot_num = req_ptr->getSlot();
  
-            if (latency > 0)
+            if (latency > 0) {                
+                assert(0);
+                
                  unscheduleEvent(req_slot_num);
+            }
+            
+            // Mark request for later removal
+            cpu->reqRemoveList.push(req_ptr);
  
              // Mark slot for removal from resource
              slot_remove_list.push_back(req_ptr->getSlot());
@@ -343,18 +431,29 @@ UseDefUnit::squash(DynInstPtr inst, int stage_num, InstSeqNum squash_seq_num,
      }
  
      if (outReadSeqNum[tid] >= squash_seq_num) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", tid);
+        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Read Seq Num Reset.\n", 
+                tid);
          outReadSeqNum[tid] = maxSeqNum;
      } else if (outReadSeqNum[tid] != maxSeqNum) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read Seq Num %i\n",
+        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Read "
+                "Seq Num %i\n",
                  tid, outReadSeqNum[tid]);
      }
  
      if (outWriteSeqNum[tid] >= squash_seq_num) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", tid);
+        DPRINTF(InOrderUseDef, "[tid:%i]: Outstanding Write Seq Num Reset.\n", 
+                tid);
          outWriteSeqNum[tid] = maxSeqNum;
      } else if (outWriteSeqNum[tid] != maxSeqNum) {
-        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write Seq Num %i\n",
+        DPRINTF(InOrderUseDef, "[tid:%i]: No need to reset Outstanding Write "
+                "Seq Num %i\n",
                  tid, outWriteSeqNum[tid]);
      }
  }
+
+void
+UseDefUnit::updateAfterContextSwitch(DynInstPtr inst, ThreadID tid)
+{
+    uniqueRegsPerSwitch = uniqueRegMap.size();
+    uniqueRegMap.clear();    
+}
diff --git a/src/cpu/inorder/resources/use_def.hh b/src/cpu/inorder/resources/use_def.hh

index 6c76d8ab52aaa71026f2896c8d71a7a8701f195e..41d758dd7a5ce2218a6e7a5c1261972e0e5025df 100644 (file)
--- a/src/cpu/inorder/resources/use_def.hh
+++ b/src/cpu/inorder/resources/use_def.hh
@@ -68,8 +68,12 @@ class UseDefUnit : public Resource {
      virtual void squash(DynInstPtr inst, int stage_num,
                          InstSeqNum squash_seq_num, ThreadID tid);
  
+    void updateAfterContextSwitch(DynInstPtr inst, ThreadID tid);    
+
      const InstSeqNum maxSeqNum;
  
+    void regStats();
+    
    protected:
      RegDepMap *regDepMap[ThePipeline::MaxThreads];
  
@@ -84,14 +88,18 @@ class UseDefUnit : public Resource {
  
      InstSeqNum floatRegSize[ThePipeline::MaxThreads];
  
+    Stats::Average uniqueRegsPerSwitch;
+    std::map<unsigned, bool> uniqueRegMap;    
+
    public:
      class UseDefRequest : public ResourceRequest {
        public:
          typedef ThePipeline::DynInstPtr DynInstPtr;
  
        public:
-        UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, int res_idx,
-                      int slot_num, unsigned cmd, int use_def_idx)
+        UseDefRequest(UseDefUnit *res, DynInstPtr inst, int stage_num, 
+                      int res_idx, int slot_num, unsigned cmd, 
+                      int use_def_idx)
              : ResourceRequest(res, inst, stage_num, res_idx, slot_num, cmd),
                useDefIdx(use_def_idx)
          { }
diff --git a/src/cpu/inorder/thread_context.cc b/src/cpu/inorder/thread_context.cc

index 41d16b633fa62e8644bf671eca69933a232c5188..d2f511b9daa7d59818182a3048f87e83ef2ed411 100644 (file)
--- a/src/cpu/inorder/thread_context.cc
+++ b/src/cpu/inorder/thread_context.cc
@@ -242,21 +242,21 @@ InOrderThreadContext::setRegOtherThread(int misc_reg, const MiscReg &val,
  void
  InOrderThreadContext::setPC(uint64_t val)
  {
-    DPRINTF(InOrderCPU, "Setting PC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting PC to %08p\n", thread->readTid(), val);
      cpu->setPC(val, thread->readTid());
  }
  
  void
  InOrderThreadContext::setNextPC(uint64_t val)
  {
-    DPRINTF(InOrderCPU, "Setting NPC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting NPC to %08p\n", thread->readTid(), val);
      cpu->setNextPC(val, thread->readTid());
  }
  
  void
  InOrderThreadContext::setNextNPC(uint64_t val)
  {
-    DPRINTF(InOrderCPU, "Setting NNPC to %08p\n", val);
+    DPRINTF(InOrderCPU, "[tid:%i] Setting NNPC to %08p\n", thread->readTid(), val);
      cpu->setNextNPC(val, thread->readTid());
  }
  
diff --git a/src/cpu/inorder/thread_context.hh b/src/cpu/inorder/thread_context.hh

index 820f3077fb03842b93a19e00bfee759a250b7437..6dd5f192f777e1e932f43693d575e9d99731b284 100644 (file)
--- a/src/cpu/inorder/thread_context.hh
+++ b/src/cpu/inorder/thread_context.hh
@@ -64,7 +64,6 @@ class InOrderThreadContext : public ThreadContext
      /** Pointer to the thread state that this TC corrseponds to. */
      InOrderThreadState *thread;
  
-
      /** Returns a pointer to the ITB. */
      /** @TODO: PERF: Should we bind this to a pointer in constructor? */
      TheISA::TLB *getITBPtr() { return cpu->getITBPtr(); }
diff --git a/src/cpu/inorder/thread_state.hh b/src/cpu/inorder/thread_state.hh

index 422df30aa13634f8d52bdb3181c1097f9d66674f..0a171a99f26a7470952143ce2f547da88ed1c924 100644 (file)
--- a/src/cpu/inorder/thread_state.hh
+++ b/src/cpu/inorder/thread_state.hh
@@ -79,14 +79,14 @@ class InOrderThreadState : public ThreadState {
  #if FULL_SYSTEM
      InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num)
          : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num),
-          cpu(_cpu), inSyscall(0), trapPending(0)
+          cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
      { }
  #else
      InOrderThreadState(InOrderCPU *_cpu, ThreadID _thread_num,
                         Process *_process)
          : ThreadState(reinterpret_cast<BaseCPU*>(_cpu), _thread_num,
                        _process),
-          cpu(_cpu), inSyscall(0), trapPending(0)
+          cpu(_cpu), inSyscall(0), trapPending(0), lastGradIsBranch(false)
      { }
  #endif
  
@@ -105,10 +105,15 @@ class InOrderThreadState : public ThreadState {
      /** Returns a pointer to the TC of this thread. */
      ThreadContext *getTC() { return tc; }
  
+    /** Return the thread id */
      int readTid() { return threadId(); }
  
-    /** Pointer to the last graduated instruction in the thread */
-    //DynInstPtr lastGradInst;
+    
+    /** Is last instruction graduated a branch? */
+    bool lastGradIsBranch;
+    Addr lastBranchPC;    
+    Addr lastBranchNextPC;    
+    Addr lastBranchNextNPC;    
  };
  
  #endif // __CPU_INORDER_THREAD_STATE_HH__
diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini

index ca54a43c1e6fee309295ec80adf35dfabe5cef5a..95f95a25b8d92a49066f26fcc68cd7d578971e8b 100644 (file)
--- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini
+++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini
@@ -63,6 +63,7 @@ progress_interval=0
  stageTracing=false
  stageWidth=1
  system=system
+threadModel=SMT
  tracer=system.cpu.tracer
  workload=system.cpu.workload
  dcache_port=system.cpu.dcache.cpu_side
@@ -78,7 +79,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -113,7 +113,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -148,7 +147,6 @@ hash_delay=1
  latency=10000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=100000
diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout

index b0f68db24050db6af2146c162243345f55ddbde4..2c2b59190b06fad7bb7a5ec8e471911ed302d2d6 100755 (executable)
--- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout
+++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout
@@ -5,10 +5,10 @@ The Regents of The University of Michigan
  All Rights Reserved
  
  
-M5 compiled Jul  4 2009 20:43:52
-M5 revision 20167772fb15 6281 default tip
-M5 started Jul  4 2009 20:43:52
-M5 executing on tater
+M5 compiled Jan 30 2010 14:58:44
+M5 revision 4b602939e245 6707 default inorder_vortex_alpha qtip tip
+M5 started Jan 30 2010 14:58:45
+M5 executing on zooks
  command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/50.vortex/alpha/tru64/inorder-timing
  Global frequency set at 1000000000000 ticks per second
  info: Entering event queue @ 0.  Starting simulation...
diff --git a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt

index 2791e3ab6893122412fce2461cd74facfe6e0aa0..f03c6675205cc14b2045c67626af5c5331f4e812 100644 (file)
--- a/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt
+++ b/tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt
@@ -1,88 +1,87 @@
  
  ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  66323                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 296324                       # Number of bytes of host memory used
-host_seconds                                  1331.98                       # Real time elapsed on the host
-host_tick_rate                               81990812                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  51950                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 166756                       # Number of bytes of host memory used
+host_seconds                                  1700.48                       # Real time elapsed on the host
+host_tick_rate                               63220517                       # Simulator tick rate (ticks/s)
  sim_freq                                 1000000000000                       # Frequency of simulated ticks
  sim_insts                                    88340673                       # Number of instructions simulated
-sim_seconds                                  0.109210                       # Number of seconds simulated
-sim_ticks                                109210014500                       # Number of ticks simulated
+sim_seconds                                  0.107505                       # Number of seconds simulated
+sim_ticks                                107505320500                       # Number of ticks simulated
  system.cpu.AGEN-Unit.instReqsProcessed       35224018                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.instReqsProcessed     88340674                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.predictedNotTaken     10443271                       # Number of Branches Predicted As Not Taken (False).
-system.cpu.Branch-Predictor.predictedTaken      3311206                       # Number of Branches Predicted As Taken (True).
-system.cpu.Decode-Unit.instReqsProcessed     88340674                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.instReqsProcessed     88523379                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.predictedNotTaken     10466150                       # Number of Branches Predicted As Not Taken (False).
+system.cpu.Branch-Predictor.predictedTaken      3314731                       # Number of Branches Predicted As Taken (True).
+system.cpu.Decode-Unit.instReqsProcessed     88523379                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.cyclesExecuted     53070972                       # Number of Cycles Execution Unit was used.
  system.cpu.Execution-Unit.instReqsProcessed     53075554                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Execution-Unit.predictedNotTakenIncorrect      4515835                       # Number of Branches Incorrectly Predicted As Not Taken).
-system.cpu.Execution-Unit.predictedTakenIncorrect      1659774                       # Number of Branches Incorrectly Predicted As Taken.
-system.cpu.Fetch-Buffer-T0.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T0.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Buffer-T1.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T1.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Seq-Unit.instReqsProcessed    184507615                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.predictedNotTakenIncorrect      4515839                       # Number of Branches Incorrectly Predicted As Not Taken).
+system.cpu.Execution-Unit.predictedTakenIncorrect      1659770                       # Number of Branches Incorrectly Predicted As Taken.
+system.cpu.Execution-Unit.utilization        0.246830                       # Utilization of Execution Unit (cycles / totalCycles).
+system.cpu.Fetch-Seq-Unit.instReqsProcessed    186350086                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Graduation-Unit.instReqsProcessed     88340673                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.divInstReqsProcessed            0                       # Number of Divide Requests Processed.
  system.cpu.Mult-Div-Unit.instReqsProcessed        82202                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.multInstReqsProcessed        41101                       # Number of Multiply Requests Processed.
-system.cpu.RegFile-Manager.instReqsProcessed    158796488                       # Number of Instructions Requests that completed in this resource.
+system.cpu.RegFile-Manager.instReqsProcessed    165783241                       # Number of Instructions Requests that completed in this resource.
+system.cpu.activity                         86.931340                       # Percentage of cycles cpu is active
  system.cpu.committedInsts                    88340673                       # Number of Instructions Simulated (Per-Thread)
  system.cpu.committedInsts_total              88340673                       # Number of Instructions Simulated (Total)
-system.cpu.cpi                               2.472474                       # CPI: Cycles Per Instruction (Per-Thread)
-system.cpu.cpi_total                         2.472474                       # CPI: Total CPI of All Threads
+system.cpu.contextSwitches                          1                       # Number of context switches
+system.cpu.cpi                               2.433881                       # CPI: Cycles Per Instruction (Per-Thread)
+system.cpu.cpi_total                         2.433881                       # CPI: Total CPI of All Threads
  system.cpu.dcache.ReadReq_accesses           20276638                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 38181.240129                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 35069.166968                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits               20215854                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency     2320808500                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.002998                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                60784                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                18                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency   2131013000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_avg_miss_latency 38009.956226                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 34917.034197                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits               20215872                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency     2309713000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.002997                       # miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_misses                60766                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency   2121768500                       # number of ReadReq MSHR miss cycles
  system.cpu.dcache.ReadReq_mshr_miss_rate     0.002997                       # mshr miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_mshr_misses           60766                       # number of ReadReq MSHR misses
  system.cpu.dcache.WriteReq_accesses          14613377                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 56049.825426                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53049.825426                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 56040.926479                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53040.926479                       # average WriteReq mshr miss latency
  system.cpu.dcache.WriteReq_hits              14463584                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency    8395871500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency    8394538500                       # number of WriteReq miss cycles
  system.cpu.dcache.WriteReq_miss_rate         0.010250                       # miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_misses              149793                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency   7946492500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency   7945159500                       # number of WriteReq MSHR miss cycles
  system.cpu.dcache.WriteReq_mshr_miss_rate     0.010250                       # mshr miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_mshr_misses         149793                       # number of WriteReq MSHR misses
  system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                 169.741509                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                 169.741568                       # Average number of references to valid blocks.
  system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
  system.cpu.dcache.demand_accesses            34890015                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 50891.977756                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 47860.720748                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                34679438                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency     10716680000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_avg_miss_latency 50837.302134                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 47810.485422                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                34679456                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency     10704251500                       # number of demand (read+write) miss cycles
  system.cpu.dcache.demand_miss_rate           0.006035                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                210577                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                 18                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency  10077505500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_misses                210559                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency  10066928000                       # number of demand (read+write) MSHR miss cycles
  system.cpu.dcache.demand_mshr_miss_rate      0.006035                       # mshr miss rate for demand accesses
  system.cpu.dcache.demand_mshr_misses           210559                       # number of demand (read+write) MSHR misses
  system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
  system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
  system.cpu.dcache.overall_accesses           34890015                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 50891.977756                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 47860.720748                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency 50837.302134                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 47810.485422                       # average overall mshr miss latency
  system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits               34679438                       # number of overall hits
-system.cpu.dcache.overall_miss_latency    10716680000                       # number of overall miss cycles
+system.cpu.dcache.overall_hits               34679456                       # number of overall hits
+system.cpu.dcache.overall_miss_latency    10704251500                       # number of overall miss cycles
  system.cpu.dcache.overall_miss_rate          0.006035                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses               210577                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits                18                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency  10077505500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_misses               210559                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency  10066928000                       # number of overall MSHR miss cycles
  system.cpu.dcache.overall_mshr_miss_rate     0.006035                       # mshr miss rate for overall accesses
  system.cpu.dcache.overall_mshr_misses          210559                       # number of overall MSHR misses
  system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -90,9 +89,9 @@ system.cpu.dcache.overall_mshr_uncacheable_misses            0
  system.cpu.dcache.replacements                 200248                       # number of replacements
  system.cpu.dcache.sampled_refs                 204344                       # Sample count of references to valid blocks.
  system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse               4077.182458                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                 34685659                       # Total number of references to valid blocks.
-system.cpu.dcache.warmup_cycle              848449000                       # Cycle when the warmup percentage was hit.
+system.cpu.dcache.tagsinuse               4076.864414                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                 34685671                       # Total number of references to valid blocks.
+system.cpu.dcache.warmup_cycle              848885000                       # Cycle when the warmup percentage was hit.
  system.cpu.dcache.writebacks                   147714                       # number of writebacks
  system.cpu.dcache_port.instReqsProcessed     35224018                       # Number of Instructions Requests that completed in this resource.
  system.cpu.dtb.data_accesses                 34987415                       # DTB accesses
@@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses                14620629                       # DT
  system.cpu.dtb.write_acv                            0                       # DTB write access violations
  system.cpu.dtb.write_hits                    14613377                       # DTB write hits
  system.cpu.dtb.write_misses                      7252                       # DTB write misses
-system.cpu.icache.ReadReq_accesses           96166938                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 19084.949617                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 15849.033723                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits               96087744                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency     1511413500                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_accesses           97826463                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 19024.038820                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 15840.795350                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits               97745885                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency     1532919000                       # number of ReadReq miss cycles
  system.cpu.icache.ReadReq_miss_rate          0.000824                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                79194                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits              1266                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency   1235083500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.000810                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                80578                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits              2650                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency   1234441500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.000797                       # mshr miss rate for ReadReq accesses
  system.cpu.icache.ReadReq_mshr_misses           77928                       # number of ReadReq MSHR misses
  system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                1233.032338                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                1254.310197                       # Average number of references to valid blocks.
  system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses            96166938                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 19084.949617                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 15849.033723                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                96087744                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency      1511413500                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_accesses            97826463                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 19024.038820                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 15840.795350                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                97745885                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency      1532919000                       # number of demand (read+write) miss cycles
  system.cpu.icache.demand_miss_rate           0.000824                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                 79194                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits               1266                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency   1235083500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.000810                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_misses                 80578                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits               2650                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency   1234441500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.000797                       # mshr miss rate for demand accesses
  system.cpu.icache.demand_mshr_misses            77928                       # number of demand (read+write) MSHR misses
  system.cpu.icache.fast_writes                       0                       # number of fast writes performed
  system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses           96166938                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 19084.949617                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 15849.033723                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses           97826463                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 19024.038820                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 15840.795350                       # average overall mshr miss latency
  system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits               96087744                       # number of overall hits
-system.cpu.icache.overall_miss_latency     1511413500                       # number of overall miss cycles
+system.cpu.icache.overall_hits               97745885                       # number of overall hits
+system.cpu.icache.overall_miss_latency     1532919000                       # number of overall miss cycles
  system.cpu.icache.overall_miss_rate          0.000824                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                79194                       # number of overall misses
-system.cpu.icache.overall_mshr_hits              1266                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency   1235083500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.000810                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_misses                80578                       # number of overall misses
+system.cpu.icache.overall_mshr_hits              2650                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency   1234441500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.000797                       # mshr miss rate for overall accesses
  system.cpu.icache.overall_mshr_misses           77928                       # number of overall MSHR misses
  system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
  system.cpu.icache.replacements                  75882                       # number of replacements
  system.cpu.icache.sampled_refs                  77928                       # Sample count of references to valid blocks.
  system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse               1874.320715                       # Cycle average of tags in use
-system.cpu.icache.total_refs                 96087744                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse               1873.747475                       # Cycle average of tags in use
+system.cpu.icache.total_refs                 97745885                       # Total number of references to valid blocks.
  system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.icache_port.instReqsProcessed     96166940                       # Number of Instructions Requests that completed in this resource.
-system.cpu.ipc                               0.404453                       # IPC: Instructions Per Cycle (Per-Thread)
-system.cpu.ipc_total                         0.404453                       # IPC: Total IPC of All Threads
+system.cpu.icache_port.instReqsProcessed     97826706                       # Number of Instructions Requests that completed in this resource.
+system.cpu.idleCycles                        28099010                       # Number of cycles cpu's stages were not processed
+system.cpu.ipc                               0.410867                       # IPC: Instructions Per Cycle (Per-Thread)
+system.cpu.ipc_total                         0.410867                       # IPC: Total IPC of All Threads
  system.cpu.itb.data_accesses                        0                       # DTB accesses
  system.cpu.itb.data_acv                             0                       # DTB access violations
  system.cpu.itb.data_hits                            0                       # DTB hits
  system.cpu.itb.data_misses                          0                       # DTB misses
-system.cpu.itb.fetch_accesses                96170872                       # ITB accesses
+system.cpu.itb.fetch_accesses                97830397                       # ITB accesses
  system.cpu.itb.fetch_acv                            0                       # ITB acv
-system.cpu.itb.fetch_hits                    96166938                       # ITB hits
+system.cpu.itb.fetch_hits                    97826463                       # ITB hits
  system.cpu.itb.fetch_misses                      3934                       # ITB misses
  system.cpu.itb.read_accesses                        0                       # DTB read accesses
  system.cpu.itb.read_acv                             0                       # DTB read access violations
@@ -185,31 +185,31 @@ system.cpu.itb.write_acv                            0                       # DT
  system.cpu.itb.write_hits                           0                       # DTB write hits
  system.cpu.itb.write_misses                         0                       # DTB write misses
  system.cpu.l2cache.ReadExReq_accesses          143578                       # number of ReadExReq accesses(hits+misses)
-system.cpu.l2cache.ReadExReq_avg_miss_latency 52038.849963                       # average ReadExReq miss latency
-system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.083578                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency   7471634000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_avg_miss_latency 52034.768558                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000.222875                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency   7471048000                       # number of ReadExReq miss cycles
  system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_misses            143578                       # number of ReadExReq misses
-system.cpu.l2cache.ReadExReq_mshr_miss_latency   5743132000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_latency   5743152000                       # number of ReadExReq MSHR miss cycles
  system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_mshr_misses       143578                       # number of ReadExReq MSHR misses
  system.cpu.l2cache.ReadReq_accesses            138694                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 52316.057051                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40003.485162                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency 52087.681159                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40004.623879                       # average ReadReq mshr miss latency
  system.cpu.l2cache.ReadReq_hits                 95224                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency    2274179000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency    2264251500                       # number of ReadReq miss cycles
  system.cpu.l2cache.ReadReq_miss_rate         0.313424                       # miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_misses               43470                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency   1738951500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency   1739001000                       # number of ReadReq MSHR miss cycles
  system.cpu.l2cache.ReadReq_mshr_miss_rate     0.313424                       # mshr miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_mshr_misses          43470                       # number of ReadReq MSHR misses
  system.cpu.l2cache.UpgradeReq_accesses           6215                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency 51993.805310                       # average UpgradeReq miss latency
-system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40000.884956                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency    323141500                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 51862.831858                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40002.815768                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency    322327500                       # number of UpgradeReq miss cycles
  system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_misses             6215                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency    248605500                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency    248617500                       # number of UpgradeReq MSHR miss cycles
  system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_mshr_misses         6215                       # number of UpgradeReq MSHR misses
  system.cpu.l2cache.Writeback_accesses          147714                       # number of Writeback accesses(hits+misses)
@@ -223,29 +223,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs            0                       #
  system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
  system.cpu.l2cache.demand_accesses             282272                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 52103.272957                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 40000.874107                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency 52047.065459                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 40001.245670                       # average overall mshr miss latency
  system.cpu.l2cache.demand_hits                  95224                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency     9745813000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency     9735299500                       # number of demand (read+write) miss cycles
  system.cpu.l2cache.demand_miss_rate          0.662652                       # miss rate for demand accesses
  system.cpu.l2cache.demand_misses               187048                       # number of demand (read+write) misses
  system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency   7482083500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency   7482153000                       # number of demand (read+write) MSHR miss cycles
  system.cpu.l2cache.demand_mshr_miss_rate     0.662652                       # mshr miss rate for demand accesses
  system.cpu.l2cache.demand_mshr_misses          187048                       # number of demand (read+write) MSHR misses
  system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
  system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
  system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
  system.cpu.l2cache.overall_accesses            282272                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 52103.272957                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 40000.874107                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency 52047.065459                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 40001.245670                       # average overall mshr miss latency
  system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
  system.cpu.l2cache.overall_hits                 95224                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency    9745813000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency    9735299500                       # number of overall miss cycles
  system.cpu.l2cache.overall_miss_rate         0.662652                       # miss rate for overall accesses
  system.cpu.l2cache.overall_misses              187048                       # number of overall misses
  system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency   7482083500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency   7482153000                       # number of overall MSHR miss cycles
  system.cpu.l2cache.overall_mshr_miss_rate     0.662652                       # mshr miss rate for overall accesses
  system.cpu.l2cache.overall_mshr_misses         187048                       # number of overall MSHR misses
  system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -253,16 +253,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses            0
  system.cpu.l2cache.replacements                147733                       # number of replacements
  system.cpu.l2cache.sampled_refs                172939                       # Sample count of references to valid blocks.
  system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse             18262.944082                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse             18257.402494                       # Cycle average of tags in use
  system.cpu.l2cache.total_refs                  110306                       # Total number of references to valid blocks.
  system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
  system.cpu.l2cache.writebacks                  120636                       # number of writebacks
-system.cpu.numCycles                        218420030                       # number of cpu cycles simulated
+system.cpu.numCycles                        215010642                       # number of cpu cycles simulated
+system.cpu.runCycles                        186911632                       # Number of cycles cpu stages are processed.
  system.cpu.smtCommittedInsts                        0                       # Number of SMT Instructions Simulated (Per-Thread)
-system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
+system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was in SMT-mode
  system.cpu.smt_cpi                           no_value                       # CPI: Total SMT-CPI
  system.cpu.smt_ipc                           no_value                       # IPC: Total SMT-IPC
-system.cpu.threadCycles                     218420030                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
+system.cpu.stage-0.idleCycles               117180245                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-0.runCycles                 97830397                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-0.utilization              45.500258                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-1.idleCycles               126487263                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-1.runCycles                 88523379                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-1.utilization              41.171627                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-2.idleCycles               125185318                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-2.runCycles                 89825324                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-2.utilization              41.777153                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-3.idleCycles               179779372                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-3.runCycles                 35231270                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-3.utilization              16.385826                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-4.idleCycles               126669969                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-4.runCycles                 88340673                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-4.utilization              41.086651                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.threadCycles                     215010642                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
  system.cpu.workload.PROG:num_syscalls            4583                       # Number of system calls
  
  ---------- End Simulation Statistics   ----------
diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini

index f04bd741b98b63e89283e676ef16eb4ab8273654..ca2d0ba7ecb27b5f035ac4dd781b4e7691aba3e4 100644 (file)
--- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini
+++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini
@@ -63,6 +63,7 @@ progress_interval=0
  stageTracing=false
  stageWidth=1
  system=system
+threadModel=SMT
  tracer=system.cpu.tracer
  workload=system.cpu.workload
  dcache_port=system.cpu.dcache.cpu_side
@@ -78,7 +79,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -113,7 +113,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -148,7 +147,6 @@ hash_delay=1
  latency=10000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=100000
diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout

index 4a762fa1c898c24ec9f6e40446325485d39fe769..309f6bf402b512dddb68e8c729f2042d89eecee5 100755 (executable)
--- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout
+++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout
@@ -5,10 +5,10 @@ The Regents of The University of Michigan
  All Rights Reserved
  
  
-M5 compiled Jul  4 2009 20:43:52
-M5 revision 20167772fb15 6281 default tip
-M5 started Jul  4 2009 20:43:52
-M5 executing on tater
+M5 compiled Jan 29 2010 09:29:58
+M5 revision a196f8cf520a 6706 default qtip tip inorder_twolf_alpha
+M5 started Jan 29 2010 09:31:14
+M5 executing on zooks
  command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing
  Couldn't unlink  build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sav
  Couldn't unlink  build/ALPHA_SE/tests/fast/long/70.twolf/alpha/tru64/inorder-timing/smred.sv2
diff --git a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt

index c58b2a060cf8a02000e4c22b30dd191c532aae43..0453fd079482833b5435c7cc58d2cd2cda032cd7 100644 (file)
--- a/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt
+++ b/tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt
@@ -1,88 +1,87 @@
  
  ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  69440                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 210892                       # Number of bytes of host memory used
-host_seconds                                  1323.48                       # Real time elapsed on the host
-host_tick_rate                               76516395                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  55182                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 156168                       # Number of bytes of host memory used
+host_seconds                                  1665.47                       # Real time elapsed on the host
+host_tick_rate                               59164617                       # Simulator tick rate (ticks/s)
  sim_freq                                 1000000000000                       # Frequency of simulated ticks
  sim_insts                                    91903056                       # Number of instructions simulated
-sim_seconds                                  0.101268                       # Number of seconds simulated
-sim_ticks                                101268061000                       # Number of ticks simulated
+sim_seconds                                  0.098537                       # Number of seconds simulated
+sim_ticks                                 98536744000                       # Number of ticks simulated
  system.cpu.AGEN-Unit.instReqsProcessed       26537108                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.instReqsProcessed     91903057                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.predictedNotTaken      8198984                       # Number of Branches Predicted As Not Taken (False).
-system.cpu.Branch-Predictor.predictedTaken      2041701                       # Number of Branches Predicted As Taken (True).
-system.cpu.Decode-Unit.instReqsProcessed     91903057                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.instReqsProcessed     92657148                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.predictedNotTaken      8232810                       # Number of Branches Predicted As Not Taken (False).
+system.cpu.Branch-Predictor.predictedTaken      2041716                       # Number of Branches Predicted As Taken (True).
+system.cpu.Decode-Unit.instReqsProcessed     92657148                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.cyclesExecuted     64907308                       # Number of Cycles Execution Unit was used.
  system.cpu.Execution-Unit.instReqsProcessed     64907696                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Execution-Unit.predictedNotTakenIncorrect      3739118                       # Number of Branches Incorrectly Predicted As Not Taken).
  system.cpu.Execution-Unit.predictedTakenIncorrect      1029596                       # Number of Branches Incorrectly Predicted As Taken.
-system.cpu.Fetch-Buffer-T0.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T0.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Buffer-T1.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T1.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Seq-Unit.instReqsProcessed    189586934                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.utilization        0.329356                       # Utilization of Execution Unit (cycles / totalCycles).
+system.cpu.Fetch-Seq-Unit.instReqsProcessed    191370621                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Graduation-Unit.instReqsProcessed     91903056                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.divInstReqsProcessed            0                       # Number of Divide Requests Processed.
  system.cpu.Mult-Div-Unit.instReqsProcessed       916504                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.multInstReqsProcessed       458252                       # Number of Multiply Requests Processed.
-system.cpu.RegFile-Manager.instReqsProcessed    188816950                       # Number of Instructions Requests that completed in this resource.
+system.cpu.RegFile-Manager.instReqsProcessed    196152134                       # Number of Instructions Requests that completed in this resource.
+system.cpu.activity                         96.743392                       # Percentage of cycles cpu is active
  system.cpu.committedInsts                    91903056                       # Number of Instructions Simulated (Per-Thread)
  system.cpu.committedInsts_total              91903056                       # Number of Instructions Simulated (Total)
-system.cpu.cpi                               2.203802                       # CPI: Cycles Per Instruction (Per-Thread)
-system.cpu.cpi_total                         2.203802                       # CPI: Total CPI of All Threads
+system.cpu.contextSwitches                          1                       # Number of context switches
+system.cpu.cpi                               2.144363                       # CPI: Cycles Per Instruction (Per-Thread)
+system.cpu.cpi_total                         2.144363                       # CPI: Total CPI of All Threads
  system.cpu.dcache.ReadReq_accesses           19996198                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 51623.700624                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48550.526316                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits               19995717                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency       24831000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_avg_miss_latency 51569.473684                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 48547.368421                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits               19995723                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency       24495500                       # number of ReadReq miss cycles
  system.cpu.dcache.ReadReq_miss_rate          0.000024                       # miss rate for ReadReq accesses
-system.cpu.dcache.ReadReq_misses                  481                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_hits                 6                       # number of ReadReq MSHR hits
-system.cpu.dcache.ReadReq_mshr_miss_latency     23061500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_misses                  475                       # number of ReadReq misses
+system.cpu.dcache.ReadReq_mshr_miss_latency     23060000                       # number of ReadReq MSHR miss cycles
  system.cpu.dcache.ReadReq_mshr_miss_rate     0.000024                       # mshr miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_mshr_misses             475                       # number of ReadReq MSHR misses
  system.cpu.dcache.WriteReq_accesses           6501103                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 56415.277031                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53415.277031                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 56295.857988                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53295.857988                       # average WriteReq mshr miss latency
  system.cpu.dcache.WriteReq_hits               6499244                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency     104876000                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency     104654000                       # number of WriteReq miss cycles
  system.cpu.dcache.WriteReq_miss_rate         0.000286                       # miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_misses                1859                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency     99299000                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency     99077000                       # number of WriteReq MSHR miss cycles
  system.cpu.dcache.WriteReq_mshr_miss_rate     0.000286                       # mshr miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_mshr_misses           1859                       # number of WriteReq MSHR misses
  system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs               11918.612686                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs               11918.613585                       # Average number of references to valid blocks.
  system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
  system.cpu.dcache.demand_accesses            26497301                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 55430.341880                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 52425.235647                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                26494961                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency       129707000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_avg_miss_latency 55333.976007                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 52329.477292                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                26494967                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency       129149500                       # number of demand (read+write) miss cycles
  system.cpu.dcache.demand_miss_rate           0.000088                       # miss rate for demand accesses
-system.cpu.dcache.demand_misses                  2340                       # number of demand (read+write) misses
-system.cpu.dcache.demand_mshr_hits                  6                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency    122360500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_misses                  2334                       # number of demand (read+write) misses
+system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
+system.cpu.dcache.demand_mshr_miss_latency    122137000                       # number of demand (read+write) MSHR miss cycles
  system.cpu.dcache.demand_mshr_miss_rate      0.000088                       # mshr miss rate for demand accesses
  system.cpu.dcache.demand_mshr_misses             2334                       # number of demand (read+write) MSHR misses
  system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
  system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
  system.cpu.dcache.overall_accesses           26497301                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 55430.341880                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 52425.235647                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency 55333.976007                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 52329.477292                       # average overall mshr miss latency
  system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits               26494961                       # number of overall hits
-system.cpu.dcache.overall_miss_latency      129707000                       # number of overall miss cycles
+system.cpu.dcache.overall_hits               26494967                       # number of overall hits
+system.cpu.dcache.overall_miss_latency      129149500                       # number of overall miss cycles
  system.cpu.dcache.overall_miss_rate          0.000088                       # miss rate for overall accesses
-system.cpu.dcache.overall_misses                 2340                       # number of overall misses
-system.cpu.dcache.overall_mshr_hits                 6                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency    122360500                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_misses                 2334                       # number of overall misses
+system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
+system.cpu.dcache.overall_mshr_miss_latency    122137000                       # number of overall MSHR miss cycles
  system.cpu.dcache.overall_mshr_miss_rate     0.000088                       # mshr miss rate for overall accesses
  system.cpu.dcache.overall_mshr_misses            2334                       # number of overall MSHR misses
  system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -90,8 +89,8 @@ system.cpu.dcache.overall_mshr_uncacheable_misses            0
  system.cpu.dcache.replacements                    157                       # number of replacements
  system.cpu.dcache.sampled_refs                   2223                       # Sample count of references to valid blocks.
  system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse               1441.819572                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                 26495076                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse               1441.684134                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                 26495078                       # Total number of references to valid blocks.
  system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.dcache.writebacks                      104                       # number of writebacks
  system.cpu.dcache_port.instReqsProcessed     26537108                       # Number of Instructions Requests that completed in this resource.
@@ -111,70 +110,71 @@ system.cpu.dtb.write_accesses                 6501126                       # DT
  system.cpu.dtb.write_acv                            0                       # DTB write access violations
  system.cpu.dtb.write_hits                     6501103                       # DTB write hits
  system.cpu.dtb.write_misses                        23                       # DTB write misses
-system.cpu.icache.ReadReq_accesses           97683877                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 27282.787360                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 24026.266636                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits               97675238                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency      235696000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_accesses           98713473                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 27258.057090                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 23994.339402                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits               98704785                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency      236818000                       # number of ReadReq miss cycles
  system.cpu.icache.ReadReq_miss_rate          0.000088                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                 8639                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                73                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency    205809000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.000088                       # mshr miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_mshr_misses            8566                       # number of ReadReq MSHR misses
+system.cpu.icache.ReadReq_misses                 8688                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits               120                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency    205583500                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.000087                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_misses            8568                       # number of ReadReq MSHR misses
  system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs               11402.666122                       # Average number of references to valid blocks.
+system.cpu.icache.avg_blocked_cycles::no_targets         1000                       # average number of cycles each access was blocked
+system.cpu.icache.avg_refs               11520.166317                       # Average number of references to valid blocks.
  system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
-system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
+system.cpu.icache.blocked::no_targets               1                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
-system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
+system.cpu.icache.blocked_cycles::no_targets         1000                       # number of cycles access was blocked
  system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses            97683877                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 27282.787360                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 24026.266636                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                97675238                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency       235696000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_accesses            98713473                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 27258.057090                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 23994.339402                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                98704785                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency       236818000                       # number of demand (read+write) miss cycles
  system.cpu.icache.demand_miss_rate           0.000088                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                  8639                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                 73                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency    205809000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.000088                       # mshr miss rate for demand accesses
-system.cpu.icache.demand_mshr_misses             8566                       # number of demand (read+write) MSHR misses
+system.cpu.icache.demand_misses                  8688                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                120                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency    205583500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.000087                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_misses             8568                       # number of demand (read+write) MSHR misses
  system.cpu.icache.fast_writes                       0                       # number of fast writes performed
  system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses           97683877                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 27282.787360                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 24026.266636                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses           98713473                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 27258.057090                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 23994.339402                       # average overall mshr miss latency
  system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits               97675238                       # number of overall hits
-system.cpu.icache.overall_miss_latency      235696000                       # number of overall miss cycles
+system.cpu.icache.overall_hits               98704785                       # number of overall hits
+system.cpu.icache.overall_miss_latency      236818000                       # number of overall miss cycles
  system.cpu.icache.overall_miss_rate          0.000088                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                 8639                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                73                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency    205809000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.000088                       # mshr miss rate for overall accesses
-system.cpu.icache.overall_mshr_misses            8566                       # number of overall MSHR misses
+system.cpu.icache.overall_misses                 8688                       # number of overall misses
+system.cpu.icache.overall_mshr_hits               120                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency    205583500                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.000087                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_misses            8568                       # number of overall MSHR misses
  system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
-system.cpu.icache.replacements                   6732                       # number of replacements
-system.cpu.icache.sampled_refs                   8566                       # Sample count of references to valid blocks.
+system.cpu.icache.replacements                   6734                       # number of replacements
+system.cpu.icache.sampled_refs                   8568                       # Sample count of references to valid blocks.
  system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse               1428.614683                       # Cycle average of tags in use
-system.cpu.icache.total_refs                 97675238                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse               1428.229557                       # Cycle average of tags in use
+system.cpu.icache.total_refs                 98704785                       # Total number of references to valid blocks.
  system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.icache_port.instReqsProcessed     97683876                       # Number of Instructions Requests that completed in this resource.
-system.cpu.ipc                               0.453761                       # IPC: Instructions Per Cycle (Per-Thread)
-system.cpu.ipc_total                         0.453761                       # IPC: Total IPC of All Threads
+system.cpu.icache_port.instReqsProcessed     98713472                       # Number of Instructions Requests that completed in this resource.
+system.cpu.idleCycles                         6417911                       # Number of cycles cpu's stages were not processed
+system.cpu.ipc                               0.466339                       # IPC: Instructions Per Cycle (Per-Thread)
+system.cpu.ipc_total                         0.466339                       # IPC: Total IPC of All Threads
  system.cpu.itb.data_accesses                        0                       # DTB accesses
  system.cpu.itb.data_acv                             0                       # DTB access violations
  system.cpu.itb.data_hits                            0                       # DTB hits
  system.cpu.itb.data_misses                          0                       # DTB misses
-system.cpu.itb.fetch_accesses                97683924                       # ITB accesses
+system.cpu.itb.fetch_accesses                98713520                       # ITB accesses
  system.cpu.itb.fetch_acv                            0                       # ITB acv
-system.cpu.itb.fetch_hits                    97683877                       # ITB hits
+system.cpu.itb.fetch_hits                    98713473                       # ITB hits
  system.cpu.itb.fetch_misses                        47                       # ITB misses
  system.cpu.itb.read_accesses                        0                       # DTB read accesses
  system.cpu.itb.read_acv                             0                       # DTB read access violations
@@ -185,84 +185,100 @@ system.cpu.itb.write_acv                            0                       # DT
  system.cpu.itb.write_hits                           0                       # DTB write hits
  system.cpu.itb.write_misses                         0                       # DTB write misses
  system.cpu.l2cache.ReadExReq_accesses            1748                       # number of ReadExReq accesses(hits+misses)
-system.cpu.l2cache.ReadExReq_avg_miss_latency 52413.043478                       # average ReadExReq miss latency
-system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40003.432494                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency     91618000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_avg_miss_latency 52296.624714                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40005.720824                       # average ReadExReq mshr miss latency
+system.cpu.l2cache.ReadExReq_miss_latency     91414500                       # number of ReadExReq miss cycles
  system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_misses              1748                       # number of ReadExReq misses
-system.cpu.l2cache.ReadExReq_mshr_miss_latency     69926000                       # number of ReadExReq MSHR miss cycles
+system.cpu.l2cache.ReadExReq_mshr_miss_latency     69930000                       # number of ReadExReq MSHR miss cycles
  system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_mshr_misses         1748                       # number of ReadExReq MSHR misses
-system.cpu.l2cache.ReadReq_accesses              9041                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 52240.613777                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40013.548808                       # average ReadReq mshr miss latency
-system.cpu.l2cache.ReadReq_hits                  5978                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency     160013000                       # number of ReadReq miss cycles
-system.cpu.l2cache.ReadReq_miss_rate         0.338790                       # miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_accesses              9043                       # number of ReadReq accesses(hits+misses)
+system.cpu.l2cache.ReadReq_avg_miss_latency 52161.443030                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40020.078355                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_hits                  5980                       # number of ReadReq hits
+system.cpu.l2cache.ReadReq_miss_latency     159770500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_rate         0.338715                       # miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_misses                3063                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency    122561500                       # number of ReadReq MSHR miss cycles
-system.cpu.l2cache.ReadReq_mshr_miss_rate     0.338790                       # mshr miss rate for ReadReq accesses
+system.cpu.l2cache.ReadReq_mshr_miss_latency    122581500                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_rate     0.338715                       # mshr miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_mshr_misses           3063                       # number of ReadReq MSHR misses
  system.cpu.l2cache.UpgradeReq_accesses            111                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency 52414.414414                       # average UpgradeReq miss latency
-system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        40000                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency      5818000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 52216.216216                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40009.009009                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency      5796000                       # number of UpgradeReq miss cycles
  system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_misses              111                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency      4440000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency      4441000                       # number of UpgradeReq MSHR miss cycles
  system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_mshr_misses          111                       # number of UpgradeReq MSHR misses
  system.cpu.l2cache.Writeback_accesses             104                       # number of Writeback accesses(hits+misses)
  system.cpu.l2cache.Writeback_hits                 104                       # number of Writeback hits
  system.cpu.l2cache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.l2cache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.l2cache.avg_refs                  1.968317                       # Average number of references to valid blocks.
+system.cpu.l2cache.avg_refs                  1.968977                       # Average number of references to valid blocks.
  system.cpu.l2cache.blocked::no_mshrs                0                       # number of cycles access was blocked
  system.cpu.l2cache.blocked::no_targets              0                       # number of cycles access was blocked
  system.cpu.l2cache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
-system.cpu.l2cache.demand_accesses              10789                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 52303.263355                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 40009.873207                       # average overall mshr miss latency
-system.cpu.l2cache.demand_hits                   5978                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency      251631000                       # number of demand (read+write) miss cycles
-system.cpu.l2cache.demand_miss_rate          0.445917                       # miss rate for demand accesses
+system.cpu.l2cache.demand_accesses              10791                       # number of demand (read+write) accesses
+system.cpu.l2cache.demand_avg_miss_latency 52210.559135                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 40014.861775                       # average overall mshr miss latency
+system.cpu.l2cache.demand_hits                   5980                       # number of demand (read+write) hits
+system.cpu.l2cache.demand_miss_latency      251185000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_rate          0.445834                       # miss rate for demand accesses
  system.cpu.l2cache.demand_misses                 4811                       # number of demand (read+write) misses
  system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency    192487500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.l2cache.demand_mshr_miss_rate     0.445917                       # mshr miss rate for demand accesses
+system.cpu.l2cache.demand_mshr_miss_latency    192511500                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_rate     0.445834                       # mshr miss rate for demand accesses
  system.cpu.l2cache.demand_mshr_misses            4811                       # number of demand (read+write) MSHR misses
  system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
  system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
  system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
-system.cpu.l2cache.overall_accesses             10789                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 52303.263355                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 40009.873207                       # average overall mshr miss latency
+system.cpu.l2cache.overall_accesses             10791                       # number of overall (read+write) accesses
+system.cpu.l2cache.overall_avg_miss_latency 52210.559135                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 40014.861775                       # average overall mshr miss latency
  system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.l2cache.overall_hits                  5978                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency     251631000                       # number of overall miss cycles
-system.cpu.l2cache.overall_miss_rate         0.445917                       # miss rate for overall accesses
+system.cpu.l2cache.overall_hits                  5980                       # number of overall hits
+system.cpu.l2cache.overall_miss_latency     251185000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_rate         0.445834                       # miss rate for overall accesses
  system.cpu.l2cache.overall_misses                4811                       # number of overall misses
  system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency    192487500                       # number of overall MSHR miss cycles
-system.cpu.l2cache.overall_mshr_miss_rate     0.445917                       # mshr miss rate for overall accesses
+system.cpu.l2cache.overall_mshr_miss_latency    192511500                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_rate     0.445834                       # mshr miss rate for overall accesses
  system.cpu.l2cache.overall_mshr_misses           4811                       # number of overall MSHR misses
  system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.l2cache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
  system.cpu.l2cache.replacements                     0                       # number of replacements
  system.cpu.l2cache.sampled_refs                  3030                       # Sample count of references to valid blocks.
  system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse              2039.371088                       # Cycle average of tags in use
-system.cpu.l2cache.total_refs                    5964                       # Total number of references to valid blocks.
+system.cpu.l2cache.tagsinuse              2038.814805                       # Cycle average of tags in use
+system.cpu.l2cache.total_refs                    5966                       # Total number of references to valid blocks.
  system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
  system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                        202536123                       # number of cpu cycles simulated
+system.cpu.numCycles                        197073489                       # number of cpu cycles simulated
+system.cpu.runCycles                        190655578                       # Number of cycles cpu stages are processed.
  system.cpu.smtCommittedInsts                        0                       # Number of SMT Instructions Simulated (Per-Thread)
-system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
+system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was in SMT-mode
  system.cpu.smt_cpi                           no_value                       # CPI: Total SMT-CPI
  system.cpu.smt_ipc                           no_value                       # IPC: Total SMT-IPC
-system.cpu.threadCycles                     202536123                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
+system.cpu.stage-0.idleCycles                98359969                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-0.runCycles                 98713520                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-0.utilization              50.089700                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-1.idleCycles               104416341                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-1.runCycles                 92657148                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-1.utilization              47.016546                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-2.idleCycles               103581004                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-2.runCycles                 93492485                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-2.utilization              47.440417                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-3.idleCycles               170536358                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-3.runCycles                 26537131                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-3.utilization              13.465602                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-4.idleCycles               105170433                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-4.runCycles                 91903056                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-4.utilization              46.633901                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.threadCycles                     197073489                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
  system.cpu.workload.PROG:num_syscalls             389                       # Number of system calls
  
  ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini

index b30560264af3021e724ecc3a1f8e4c3a27aa7616..5ab5381fc8aa4fd2b4ee1b4aad982bbbb251f6fa 100644 (file)
--- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini
+++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini
@@ -63,6 +63,7 @@ progress_interval=0
  stageTracing=false
  stageWidth=1
  system=system
+threadModel=SMT
  tracer=system.cpu.tracer
  workload=system.cpu.workload
  dcache_port=system.cpu.dcache.cpu_side
@@ -78,7 +79,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -113,7 +113,6 @@ hash_delay=1
  latency=1000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=10000
@@ -148,7 +147,6 @@ hash_delay=1
  latency=10000
  max_miss_count=0
  mshrs=10
-prefetch_cache_check_push=true
  prefetch_data_accesses_only=false
  prefetch_degree=1
  prefetch_latency=100000
@@ -190,7 +188,7 @@ egid=100
  env=
  errout=cerr
  euid=100
-executable=tests/test-progs/hello/bin/alpha/linux/hello
+executable=/dist/m5/regression/test-progs/hello/bin/alpha/linux/hello
  gid=100
  input=cin
  max_stack_size=67108864
diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout

index 18efdaa9e3889fb5569a8eee6f3923969c8dff5a..4ad6292c59c4af854d5a26798a5d436584522a35 100755 (executable)
--- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout
+++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
  All Rights Reserved
  
  
-M5 compiled May 12 2009 11:18:39
-M5 revision 21550d38f156 6195 default qtip tip inorder-hello-regress
-M5 started May 12 2009 11:18:40
+M5 compiled Jan 29 2010 09:13:03
+M5 revision 23ae96d82d21+ 6704+ default qtip tip inorder_hello_alpha
+M5 started Jan 29 2010 09:13:04
  M5 executing on zooks
  command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing -re tests/run.py build/ALPHA_SE/tests/fast/quick/00.hello/alpha/linux/inorder-timing
  Global frequency set at 1000000000000 ticks per second
  info: Entering event queue @ 0.  Starting simulation...
  info: Increasing stack size by one page.
  Hello world!
-Exiting @ tick 31646000 because target called exit()
+Exiting @ tick 31286000 because target called exit()
diff --git a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt

index a88b805948081ad01e039931e17076fda45fe893..b9a12afbb8639014f3a676c3ee86910f372d4e97 100644 (file)
--- a/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt
+++ b/tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt
@@ -1,53 +1,53 @@
  
  ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  23793                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 152032                       # Number of bytes of host memory used
-host_seconds                                     0.27                       # Real time elapsed on the host
-host_tick_rate                              117464960                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  23048                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 153228                       # Number of bytes of host memory used
+host_seconds                                     0.28                       # Real time elapsed on the host
+host_tick_rate                              112412599                       # Simulator tick rate (ticks/s)
  sim_freq                                 1000000000000                       # Frequency of simulated ticks
  sim_insts                                        6404                       # Number of instructions simulated
-sim_seconds                                  0.000032                       # Number of seconds simulated
-sim_ticks                                    31646000                       # Number of ticks simulated
+sim_seconds                                  0.000031                       # Number of seconds simulated
+sim_ticks                                    31286000                       # Number of ticks simulated
  system.cpu.AGEN-Unit.instReqsProcessed           2050                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.instReqsProcessed         6405                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Branch-Predictor.predictedNotTaken          909                       # Number of Branches Predicted As Not Taken (False).
-system.cpu.Branch-Predictor.predictedTaken          142                       # Number of Branches Predicted As Taken (True).
-system.cpu.Decode-Unit.instReqsProcessed         6405                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.instReqsProcessed         6581                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Branch-Predictor.predictedNotTaken          924                       # Number of Branches Predicted As Not Taken (False).
+system.cpu.Branch-Predictor.predictedTaken          143                       # Number of Branches Predicted As Taken (True).
+system.cpu.Decode-Unit.instReqsProcessed         6581                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.cyclesExecuted         4340                       # Number of Cycles Execution Unit was used.
  system.cpu.Execution-Unit.instReqsProcessed         4354                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Execution-Unit.predictedNotTakenIncorrect          607                       # Number of Branches Incorrectly Predicted As Not Taken).
-system.cpu.Execution-Unit.predictedTakenIncorrect          124                       # Number of Branches Incorrectly Predicted As Taken.
-system.cpu.Fetch-Buffer-T0.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T0.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Buffer-T1.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T1.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Seq-Unit.instReqsProcessed        13560                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.predictedNotTakenIncorrect          608                       # Number of Branches Incorrectly Predicted As Not Taken).
+system.cpu.Execution-Unit.predictedTakenIncorrect          123                       # Number of Branches Incorrectly Predicted As Taken.
+system.cpu.Execution-Unit.utilization        0.069359                       # Utilization of Execution Unit (cycles / totalCycles).
+system.cpu.Fetch-Seq-Unit.instReqsProcessed        13858                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Graduation-Unit.instReqsProcessed         6404                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.divInstReqsProcessed            0                       # Number of Divide Requests Processed.
  system.cpu.Mult-Div-Unit.instReqsProcessed            2                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.multInstReqsProcessed            1                       # Number of Multiply Requests Processed.
-system.cpu.RegFile-Manager.instReqsProcessed        12884                       # Number of Instructions Requests that completed in this resource.
+system.cpu.RegFile-Manager.instReqsProcessed        19961                       # Number of Instructions Requests that completed in this resource.
+system.cpu.activity                         22.407428                       # Percentage of cycles cpu is active
  system.cpu.committedInsts                        6404                       # Number of Instructions Simulated (Per-Thread)
  system.cpu.committedInsts_total                  6404                       # Number of Instructions Simulated (Total)
-system.cpu.cpi                               9.883354                       # CPI: Cycles Per Instruction (Per-Thread)
-system.cpu.cpi_total                         9.883354                       # CPI: Total CPI of All Threads
+system.cpu.contextSwitches                          1                       # Number of context switches
+system.cpu.cpi                               9.770924                       # CPI: Cycles Per Instruction (Per-Thread)
+system.cpu.cpi_total                         9.770924                       # CPI: Total CPI of All Threads
  system.cpu.dcache.ReadReq_accesses               1185                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 56352.631579                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53352.631579                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_avg_miss_latency 56347.368421                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53347.368421                       # average ReadReq mshr miss latency
  system.cpu.dcache.ReadReq_hits                   1090                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        5353500                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_latency        5353000                       # number of ReadReq miss cycles
  system.cpu.dcache.ReadReq_miss_rate          0.080169                       # miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_misses                   95                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency      5068500                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_latency      5068000                       # number of ReadReq MSHR miss cycles
  system.cpu.dcache.ReadReq_mshr_miss_rate     0.080169                       # mshr miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_mshr_misses              95                       # number of ReadReq MSHR misses
  system.cpu.dcache.WriteReq_accesses               865                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 56419.540230                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53419.540230                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 56074.712644                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53074.712644                       # average WriteReq mshr miss latency
  system.cpu.dcache.WriteReq_hits                   778                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       4908500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       4878500                       # number of WriteReq miss cycles
  system.cpu.dcache.WriteReq_miss_rate         0.100578                       # miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_misses                  87                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency      4647500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency      4617500                       # number of WriteReq MSHR miss cycles
  system.cpu.dcache.WriteReq_mshr_miss_rate     0.100578                       # mshr miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_mshr_misses             87                       # number of WriteReq MSHR misses
  system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
@@ -59,29 +59,29 @@ system.cpu.dcache.blocked_cycles::no_mshrs            0                       #
  system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
  system.cpu.dcache.demand_accesses                2050                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 56384.615385                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 53384.615385                       # average overall mshr miss latency
+system.cpu.dcache.demand_avg_miss_latency 56217.032967                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 53217.032967                       # average overall mshr miss latency
  system.cpu.dcache.demand_hits                    1868                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency        10262000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_latency        10231500                       # number of demand (read+write) miss cycles
  system.cpu.dcache.demand_miss_rate           0.088780                       # miss rate for demand accesses
  system.cpu.dcache.demand_misses                   182                       # number of demand (read+write) misses
  system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      9716000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_latency      9685500                       # number of demand (read+write) MSHR miss cycles
  system.cpu.dcache.demand_mshr_miss_rate      0.088780                       # mshr miss rate for demand accesses
  system.cpu.dcache.demand_mshr_misses              182                       # number of demand (read+write) MSHR misses
  system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
  system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
  system.cpu.dcache.overall_accesses               2050                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 56384.615385                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 53384.615385                       # average overall mshr miss latency
+system.cpu.dcache.overall_avg_miss_latency 56217.032967                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 53217.032967                       # average overall mshr miss latency
  system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
  system.cpu.dcache.overall_hits                   1868                       # number of overall hits
-system.cpu.dcache.overall_miss_latency       10262000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_latency       10231500                       # number of overall miss cycles
  system.cpu.dcache.overall_miss_rate          0.088780                       # miss rate for overall accesses
  system.cpu.dcache.overall_misses                  182                       # number of overall misses
  system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      9716000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_latency      9685500                       # number of overall MSHR miss cycles
  system.cpu.dcache.overall_mshr_miss_rate     0.088780                       # mshr miss rate for overall accesses
  system.cpu.dcache.overall_mshr_misses             182                       # number of overall MSHR misses
  system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -89,7 +89,7 @@ system.cpu.dcache.overall_mshr_uncacheable_misses            0
  system.cpu.dcache.replacements                      0                       # number of replacements
  system.cpu.dcache.sampled_refs                    168                       # Sample count of references to valid blocks.
  system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                104.325446                       # Cycle average of tags in use
+system.cpu.dcache.tagsinuse                103.689640                       # Cycle average of tags in use
  system.cpu.dcache.total_refs                     1882                       # Total number of references to valid blocks.
  system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.dcache.writebacks                        0                       # number of writebacks
@@ -110,70 +110,71 @@ system.cpu.dtb.write_accesses                     868                       # DT
  system.cpu.dtb.write_acv                            0                       # DTB write access violations
  system.cpu.dtb.write_hits                         865                       # DTB write hits
  system.cpu.dtb.write_misses                         3                       # DTB write misses
-system.cpu.icache.ReadReq_accesses               7155                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 55763.605442                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 52949.122807                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   6861                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       16394500                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.041090                       # miss rate for ReadReq accesses
-system.cpu.icache.ReadReq_misses                  294                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_hits                 9                       # number of ReadReq MSHR hits
-system.cpu.icache.ReadReq_mshr_miss_latency     15090500                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.039832                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               7277                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 55521.594684                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 52863.157895                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   6976                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       16712000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.041363                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_misses                  301                       # number of ReadReq misses
+system.cpu.icache.ReadReq_mshr_hits                16                       # number of ReadReq MSHR hits
+system.cpu.icache.ReadReq_mshr_miss_latency     15066000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.039164                       # mshr miss rate for ReadReq accesses
  system.cpu.icache.ReadReq_mshr_misses             285                       # number of ReadReq MSHR misses
  system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  24.158451                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  24.563380                       # Average number of references to valid blocks.
  system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                7155                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 55763.605442                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 52949.122807                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    6861                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        16394500                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.041090                       # miss rate for demand accesses
-system.cpu.icache.demand_misses                   294                       # number of demand (read+write) misses
-system.cpu.icache.demand_mshr_hits                  9                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     15090500                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.039832                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_accesses                7277                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 55521.594684                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 52863.157895                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    6976                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        16712000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.041363                       # miss rate for demand accesses
+system.cpu.icache.demand_misses                   301                       # number of demand (read+write) misses
+system.cpu.icache.demand_mshr_hits                 16                       # number of demand (read+write) MSHR hits
+system.cpu.icache.demand_mshr_miss_latency     15066000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.039164                       # mshr miss rate for demand accesses
  system.cpu.icache.demand_mshr_misses              285                       # number of demand (read+write) MSHR misses
  system.cpu.icache.fast_writes                       0                       # number of fast writes performed
  system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               7155                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 55763.605442                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 52949.122807                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               7277                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 55521.594684                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 52863.157895                       # average overall mshr miss latency
  system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   6861                       # number of overall hits
-system.cpu.icache.overall_miss_latency       16394500                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.041090                       # miss rate for overall accesses
-system.cpu.icache.overall_misses                  294                       # number of overall misses
-system.cpu.icache.overall_mshr_hits                 9                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     15090500                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.039832                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_hits                   6976                       # number of overall hits
+system.cpu.icache.overall_miss_latency       16712000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.041363                       # miss rate for overall accesses
+system.cpu.icache.overall_misses                  301                       # number of overall misses
+system.cpu.icache.overall_mshr_hits                16                       # number of overall MSHR hits
+system.cpu.icache.overall_mshr_miss_latency     15066000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.039164                       # mshr miss rate for overall accesses
  system.cpu.icache.overall_mshr_misses             285                       # number of overall MSHR misses
  system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
  system.cpu.icache.replacements                      0                       # number of replacements
  system.cpu.icache.sampled_refs                    284                       # Sample count of references to valid blocks.
  system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                131.383181                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     6861                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                130.373495                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     6976                       # Total number of references to valid blocks.
  system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.icache_port.instReqsProcessed         7153                       # Number of Instructions Requests that completed in this resource.
-system.cpu.ipc                               0.101180                       # IPC: Instructions Per Cycle (Per-Thread)
-system.cpu.ipc_total                         0.101180                       # IPC: Total IPC of All Threads
+system.cpu.icache_port.instReqsProcessed         7275                       # Number of Instructions Requests that completed in this resource.
+system.cpu.idleCycles                           48552                       # Number of cycles cpu's stages were not processed
+system.cpu.ipc                               0.102344                       # IPC: Instructions Per Cycle (Per-Thread)
+system.cpu.ipc_total                         0.102344                       # IPC: Total IPC of All Threads
  system.cpu.itb.data_accesses                        0                       # DTB accesses
  system.cpu.itb.data_acv                             0                       # DTB access violations
  system.cpu.itb.data_hits                            0                       # DTB hits
  system.cpu.itb.data_misses                          0                       # DTB misses
-system.cpu.itb.fetch_accesses                    7172                       # ITB accesses
+system.cpu.itb.fetch_accesses                    7294                       # ITB accesses
  system.cpu.itb.fetch_acv                            0                       # ITB acv
-system.cpu.itb.fetch_hits                        7155                       # ITB hits
+system.cpu.itb.fetch_hits                        7277                       # ITB hits
  system.cpu.itb.fetch_misses                        17                       # ITB misses
  system.cpu.itb.read_accesses                        0                       # DTB read accesses
  system.cpu.itb.read_acv                             0                       # DTB read access violations
@@ -184,28 +185,28 @@ system.cpu.itb.write_acv                            0                       # DT
  system.cpu.itb.write_hits                           0                       # DTB write hits
  system.cpu.itb.write_misses                         0                       # DTB write misses
  system.cpu.l2cache.ReadExReq_accesses              73                       # number of ReadExReq accesses(hits+misses)
-system.cpu.l2cache.ReadExReq_avg_miss_latency 52424.657534                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_miss_latency 52075.342466                       # average ReadExReq miss latency
  system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40013.698630                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency      3827000                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_latency      3801500                       # number of ReadExReq miss cycles
  system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_misses                73                       # number of ReadExReq misses
  system.cpu.l2cache.ReadExReq_mshr_miss_latency      2921000                       # number of ReadExReq MSHR miss cycles
  system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_mshr_misses           73                       # number of ReadExReq MSHR misses
  system.cpu.l2cache.ReadReq_accesses               380                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 52118.733509                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39944.591029                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency 52068.601583                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 39945.910290                       # average ReadReq mshr miss latency
  system.cpu.l2cache.ReadReq_hits                     1                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency      19753000                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency      19734000                       # number of ReadReq miss cycles
  system.cpu.l2cache.ReadReq_miss_rate         0.997368                       # miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_misses                 379                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency     15139000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency     15139500                       # number of ReadReq MSHR miss cycles
  system.cpu.l2cache.ReadReq_mshr_miss_rate     0.997368                       # mshr miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_mshr_misses            379                       # number of ReadReq MSHR misses
  system.cpu.l2cache.UpgradeReq_accesses             14                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency 52357.142857                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 52071.428571                       # average UpgradeReq miss latency
  system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency        40000                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency       733000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_miss_latency       729000                       # number of UpgradeReq miss cycles
  system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_misses               14                       # number of UpgradeReq misses
  system.cpu.l2cache.UpgradeReq_mshr_miss_latency       560000                       # number of UpgradeReq MSHR miss cycles
@@ -220,29 +221,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs            0                       #
  system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
  system.cpu.l2cache.demand_accesses                453                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 52168.141593                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 39955.752212                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency 52069.690265                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 39956.858407                       # average overall mshr miss latency
  system.cpu.l2cache.demand_hits                      1                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency       23580000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency       23535500                       # number of demand (read+write) miss cycles
  system.cpu.l2cache.demand_miss_rate          0.997792                       # miss rate for demand accesses
  system.cpu.l2cache.demand_misses                  452                       # number of demand (read+write) misses
  system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency     18060000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency     18060500                       # number of demand (read+write) MSHR miss cycles
  system.cpu.l2cache.demand_mshr_miss_rate     0.997792                       # mshr miss rate for demand accesses
  system.cpu.l2cache.demand_mshr_misses             452                       # number of demand (read+write) MSHR misses
  system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
  system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
  system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
  system.cpu.l2cache.overall_accesses               453                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 52168.141593                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 39955.752212                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency 52069.690265                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 39956.858407                       # average overall mshr miss latency
  system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
  system.cpu.l2cache.overall_hits                     1                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency      23580000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency      23535500                       # number of overall miss cycles
  system.cpu.l2cache.overall_miss_rate         0.997792                       # miss rate for overall accesses
  system.cpu.l2cache.overall_misses                 452                       # number of overall misses
  system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency     18060000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency     18060500                       # number of overall MSHR miss cycles
  system.cpu.l2cache.overall_mshr_miss_rate     0.997792                       # mshr miss rate for overall accesses
  system.cpu.l2cache.overall_mshr_misses            452                       # number of overall MSHR misses
  system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -250,16 +251,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses            0
  system.cpu.l2cache.replacements                     0                       # number of replacements
  system.cpu.l2cache.sampled_refs                   364                       # Sample count of references to valid blocks.
  system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               182.840902                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               181.532273                       # Cycle average of tags in use
  system.cpu.l2cache.total_refs                       1                       # Total number of references to valid blocks.
  system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
  system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            63293                       # number of cpu cycles simulated
+system.cpu.numCycles                            62573                       # number of cpu cycles simulated
+system.cpu.runCycles                            14021                       # Number of cycles cpu stages are processed.
  system.cpu.smtCommittedInsts                        0                       # Number of SMT Instructions Simulated (Per-Thread)
-system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
+system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was in SMT-mode
  system.cpu.smt_cpi                           no_value                       # CPI: Total SMT-CPI
  system.cpu.smt_ipc                           no_value                       # IPC: Total SMT-IPC
-system.cpu.threadCycles                         63293                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
+system.cpu.stage-0.idleCycles                   55279                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-0.runCycles                     7294                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-0.utilization              11.656785                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-1.idleCycles                   55992                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-1.runCycles                     6581                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-1.utilization              10.517316                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-2.idleCycles                   56103                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-2.runCycles                     6470                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-2.utilization              10.339923                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-3.idleCycles                   60520                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-3.runCycles                     2053                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-3.utilization               3.280968                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-4.idleCycles                   56169                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-4.runCycles                     6404                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-4.utilization              10.234446                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.threadCycles                         62573                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
  system.cpu.workload.PROG:num_syscalls              17                       # Number of system calls
  
  ---------- End Simulation Statistics   ----------
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini

index 78a86bf82a5c9bc0206a290a06d9707f776f58a3..8d2a245081efbb5de4d947dbee7284452e2dbed6 100644 (file)
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini
@@ -117,6 +117,7 @@ progress_interval=0
  stageTracing=false
  stageWidth=1
  system=system
+threadModel=SMT
  tracer=system.cpu.tracer
  workload=system.cpu.workload
  dcache_port=system.cpu.dcache.cpu_side
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout

index 581c531f653560f25e36d4b28aef75713e3da8ae..ce217f4942f44eedfe7efa122cb9daca20a4b8ef 100755 (executable)
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/simout
@@ -5,13 +5,13 @@ The Regents of The University of Michigan
  All Rights Reserved
  
  
-M5 compiled Jan  2 2010 07:01:31
-M5 revision a538feb8a617 6813 default qtip tip qbase fixhelp.patch
-M5 started Jan  2 2010 07:03:09
-M5 executing on fajita
-command line: build/MIPS_SE/m5.opt -d build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/opt/quick/00.hello/mips/linux/inorder-timing
+M5 compiled Jan 31 2010 17:08:14
+M5 revision 01508015f86b 6964 default qtip tip inorder_hello_mips
+M5 started Jan 31 2010 17:08:15
+M5 executing on zooks
+command line: build/MIPS_SE/m5.fast -d build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing -re tests/run.py build/MIPS_SE/tests/fast/quick/00.hello/mips/linux/inorder-timing
  Global frequency set at 1000000000000 ticks per second
  info: Entering event queue @ 0.  Starting simulation...
  info: Increasing stack size by one page.
  Hello World!
-Exiting @ tick 29940500 because target called exit()
+Exiting @ tick 29206500 because target called exit()
diff --git a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt

index d55c721ca6b3a257364f82893540def9575f2064..df2d539f4dd8970bd45c093e98f6a7df40df08a1 100644 (file)
--- a/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt
+++ b/tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt
@@ -1,96 +1,96 @@
  
  ---------- Begin Simulation Statistics ----------
-host_inst_rate                                  10400                       # Simulator instruction rate (inst/s)
-host_mem_usage                                 205896                       # Number of bytes of host memory used
-host_seconds                                     0.56                       # Real time elapsed on the host
-host_tick_rate                               53415864                       # Simulator tick rate (ticks/s)
+host_inst_rate                                  19644                       # Simulator instruction rate (inst/s)
+host_mem_usage                                 155856                       # Number of bytes of host memory used
+host_seconds                                     0.30                       # Real time elapsed on the host
+host_tick_rate                               98307932                       # Simulator tick rate (ticks/s)
  sim_freq                                 1000000000000                       # Frequency of simulated ticks
  sim_insts                                        5827                       # Number of instructions simulated
-sim_seconds                                  0.000030                       # Number of seconds simulated
-sim_ticks                                    29940500                       # Number of ticks simulated
+sim_seconds                                  0.000029                       # Number of seconds simulated
+sim_ticks                                    29206500                       # Number of ticks simulated
  system.cpu.AGEN-Unit.instReqsProcessed           2090                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Branch-Predictor.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Branch-Predictor.predictedNotTaken          826                       # Number of Branches Predicted As Not Taken (False).
  system.cpu.Branch-Predictor.predictedTaken           90                       # Number of Branches Predicted As Taken (True).
  system.cpu.Decode-Unit.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.cyclesExecuted         3725                       # Number of Cycles Execution Unit was used.
  system.cpu.Execution-Unit.instReqsProcessed         3734                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Execution-Unit.predictedNotTakenIncorrect          541                       # Number of Branches Incorrectly Predicted As Not Taken).
  system.cpu.Execution-Unit.predictedTakenIncorrect           35                       # Number of Branches Incorrectly Predicted As Taken.
-system.cpu.Fetch-Buffer-T0.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T0.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Buffer-T1.instReqsProcessed            0                       # Number of Instructions Requests that completed in this resource.
-system.cpu.Fetch-Buffer-T1.instsBypassed            0                       # Number of Instructions Bypassed.
-system.cpu.Fetch-Seq-Unit.instReqsProcessed        11657                       # Number of Instructions Requests that completed in this resource.
+system.cpu.Execution-Unit.utilization        0.063769                       # Utilization of Execution Unit (cycles / totalCycles).
+system.cpu.Fetch-Seq-Unit.instReqsProcessed        11702                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Graduation-Unit.instReqsProcessed         5827                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.divInstReqsProcessed            1                       # Number of Divide Requests Processed.
  system.cpu.Mult-Div-Unit.instReqsProcessed            8                       # Number of Instructions Requests that completed in this resource.
  system.cpu.Mult-Div-Unit.multInstReqsProcessed            3                       # Number of Multiply Requests Processed.
  system.cpu.RegFile-Manager.instReqsProcessed        10713                       # Number of Instructions Requests that completed in this resource.
+system.cpu.activity                         20.277673                       # Percentage of cycles cpu is active
  system.cpu.committedInsts                        5827                       # Number of Instructions Simulated (Per-Thread)
  system.cpu.committedInsts_total                  5827                       # Number of Instructions Simulated (Total)
-system.cpu.cpi                              10.276643                       # CPI: Cycles Per Instruction (Per-Thread)
-system.cpu.cpi_total                        10.276643                       # CPI: Total CPI of All Threads
-system.cpu.dcache.ReadReq_accesses               1165                       # number of ReadReq accesses(hits+misses)
-system.cpu.dcache.ReadReq_avg_miss_latency 56201.149425                       # average ReadReq miss latency
-system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53201.149425                       # average ReadReq mshr miss latency
-system.cpu.dcache.ReadReq_hits                   1078                       # number of ReadReq hits
-system.cpu.dcache.ReadReq_miss_latency        4889500                       # number of ReadReq miss cycles
-system.cpu.dcache.ReadReq_miss_rate          0.074678                       # miss rate for ReadReq accesses
+system.cpu.contextSwitches                          1                       # Number of context switches
+system.cpu.cpi                              10.024713                       # CPI: Cycles Per Instruction (Per-Thread)
+system.cpu.cpi_total                        10.024713                       # CPI: Total CPI of All Threads
+system.cpu.dcache.ReadReq_accesses               1164                       # number of ReadReq accesses(hits+misses)
+system.cpu.dcache.ReadReq_avg_miss_latency 56229.885057                       # average ReadReq miss latency
+system.cpu.dcache.ReadReq_avg_mshr_miss_latency 53229.885057                       # average ReadReq mshr miss latency
+system.cpu.dcache.ReadReq_hits                   1077                       # number of ReadReq hits
+system.cpu.dcache.ReadReq_miss_latency        4892000                       # number of ReadReq miss cycles
+system.cpu.dcache.ReadReq_miss_rate          0.074742                       # miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_misses                   87                       # number of ReadReq misses
-system.cpu.dcache.ReadReq_mshr_miss_latency      4628500                       # number of ReadReq MSHR miss cycles
-system.cpu.dcache.ReadReq_mshr_miss_rate     0.074678                       # mshr miss rate for ReadReq accesses
+system.cpu.dcache.ReadReq_mshr_miss_latency      4631000                       # number of ReadReq MSHR miss cycles
+system.cpu.dcache.ReadReq_mshr_miss_rate     0.074742                       # mshr miss rate for ReadReq accesses
  system.cpu.dcache.ReadReq_mshr_misses              87                       # number of ReadReq MSHR misses
  system.cpu.dcache.WriteReq_accesses               925                       # number of WriteReq accesses(hits+misses)
-system.cpu.dcache.WriteReq_avg_miss_latency 56554.687500                       # average WriteReq miss latency
-system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53554.687500                       # average WriteReq mshr miss latency
+system.cpu.dcache.WriteReq_avg_miss_latency 56265.625000                       # average WriteReq miss latency
+system.cpu.dcache.WriteReq_avg_mshr_miss_latency 53265.625000                       # average WriteReq mshr miss latency
  system.cpu.dcache.WriteReq_hits                   861                       # number of WriteReq hits
-system.cpu.dcache.WriteReq_miss_latency       3619500                       # number of WriteReq miss cycles
+system.cpu.dcache.WriteReq_miss_latency       3601000                       # number of WriteReq miss cycles
  system.cpu.dcache.WriteReq_miss_rate         0.069189                       # miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_misses                  64                       # number of WriteReq misses
-system.cpu.dcache.WriteReq_mshr_miss_latency      3427500                       # number of WriteReq MSHR miss cycles
+system.cpu.dcache.WriteReq_mshr_miss_latency      3409000                       # number of WriteReq MSHR miss cycles
  system.cpu.dcache.WriteReq_mshr_miss_rate     0.069189                       # mshr miss rate for WriteReq accesses
  system.cpu.dcache.WriteReq_mshr_misses             64                       # number of WriteReq MSHR misses
  system.cpu.dcache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.dcache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.dcache.avg_refs                  14.144928                       # Average number of references to valid blocks.
+system.cpu.dcache.avg_refs                  14.137681                       # Average number of references to valid blocks.
  system.cpu.dcache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.dcache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.dcache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.dcache.cache_copies                      0                       # number of cache copies performed
-system.cpu.dcache.demand_accesses                2090                       # number of demand (read+write) accesses
-system.cpu.dcache.demand_avg_miss_latency 56350.993377                       # average overall miss latency
-system.cpu.dcache.demand_avg_mshr_miss_latency 53350.993377                       # average overall mshr miss latency
-system.cpu.dcache.demand_hits                    1939                       # number of demand (read+write) hits
-system.cpu.dcache.demand_miss_latency         8509000                       # number of demand (read+write) miss cycles
-system.cpu.dcache.demand_miss_rate           0.072249                       # miss rate for demand accesses
+system.cpu.dcache.demand_accesses                2089                       # number of demand (read+write) accesses
+system.cpu.dcache.demand_avg_miss_latency 56245.033113                       # average overall miss latency
+system.cpu.dcache.demand_avg_mshr_miss_latency 53245.033113                       # average overall mshr miss latency
+system.cpu.dcache.demand_hits                    1938                       # number of demand (read+write) hits
+system.cpu.dcache.demand_miss_latency         8493000                       # number of demand (read+write) miss cycles
+system.cpu.dcache.demand_miss_rate           0.072283                       # miss rate for demand accesses
  system.cpu.dcache.demand_misses                   151                       # number of demand (read+write) misses
  system.cpu.dcache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.dcache.demand_mshr_miss_latency      8056000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.dcache.demand_mshr_miss_rate      0.072249                       # mshr miss rate for demand accesses
+system.cpu.dcache.demand_mshr_miss_latency      8040000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.dcache.demand_mshr_miss_rate      0.072283                       # mshr miss rate for demand accesses
  system.cpu.dcache.demand_mshr_misses              151                       # number of demand (read+write) MSHR misses
  system.cpu.dcache.fast_writes                       0                       # number of fast writes performed
  system.cpu.dcache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.dcache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.dcache.overall_accesses               2090                       # number of overall (read+write) accesses
-system.cpu.dcache.overall_avg_miss_latency 56350.993377                       # average overall miss latency
-system.cpu.dcache.overall_avg_mshr_miss_latency 53350.993377                       # average overall mshr miss latency
+system.cpu.dcache.overall_accesses               2089                       # number of overall (read+write) accesses
+system.cpu.dcache.overall_avg_miss_latency 56245.033113                       # average overall miss latency
+system.cpu.dcache.overall_avg_mshr_miss_latency 53245.033113                       # average overall mshr miss latency
  system.cpu.dcache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.dcache.overall_hits                   1939                       # number of overall hits
-system.cpu.dcache.overall_miss_latency        8509000                       # number of overall miss cycles
-system.cpu.dcache.overall_miss_rate          0.072249                       # miss rate for overall accesses
+system.cpu.dcache.overall_hits                   1938                       # number of overall hits
+system.cpu.dcache.overall_miss_latency        8493000                       # number of overall miss cycles
+system.cpu.dcache.overall_miss_rate          0.072283                       # miss rate for overall accesses
  system.cpu.dcache.overall_misses                  151                       # number of overall misses
  system.cpu.dcache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.dcache.overall_mshr_miss_latency      8056000                       # number of overall MSHR miss cycles
-system.cpu.dcache.overall_mshr_miss_rate     0.072249                       # mshr miss rate for overall accesses
+system.cpu.dcache.overall_mshr_miss_latency      8040000                       # number of overall MSHR miss cycles
+system.cpu.dcache.overall_mshr_miss_rate     0.072283                       # mshr miss rate for overall accesses
  system.cpu.dcache.overall_mshr_misses             151                       # number of overall MSHR misses
  system.cpu.dcache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.dcache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
  system.cpu.dcache.replacements                      0                       # number of replacements
  system.cpu.dcache.sampled_refs                    138                       # Sample count of references to valid blocks.
  system.cpu.dcache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.dcache.tagsinuse                 88.212490                       # Cycle average of tags in use
-system.cpu.dcache.total_refs                     1952                       # Total number of references to valid blocks.
+system.cpu.dcache.tagsinuse                 88.491296                       # Cycle average of tags in use
+system.cpu.dcache.total_refs                     1951                       # Total number of references to valid blocks.
  system.cpu.dcache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.dcache.writebacks                        0                       # number of writebacks
  system.cpu.dcache_port.instReqsProcessed         2089                       # Number of Instructions Requests that completed in this resource.
@@ -103,62 +103,63 @@ system.cpu.dtb.read_misses                          0                       # DT
  system.cpu.dtb.write_accesses                       0                       # DTB write accesses
  system.cpu.dtb.write_hits                           0                       # DTB write hits
  system.cpu.dtb.write_misses                         0                       # DTB write misses
-system.cpu.icache.ReadReq_accesses               5829                       # number of ReadReq accesses(hits+misses)
-system.cpu.icache.ReadReq_avg_miss_latency 55765.676568                       # average ReadReq miss latency
-system.cpu.icache.ReadReq_avg_mshr_miss_latency 52765.676568                       # average ReadReq mshr miss latency
-system.cpu.icache.ReadReq_hits                   5526                       # number of ReadReq hits
-system.cpu.icache.ReadReq_miss_latency       16897000                       # number of ReadReq miss cycles
-system.cpu.icache.ReadReq_miss_rate          0.051981                       # miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_accesses               5874                       # number of ReadReq accesses(hits+misses)
+system.cpu.icache.ReadReq_avg_miss_latency 55801.980198                       # average ReadReq miss latency
+system.cpu.icache.ReadReq_avg_mshr_miss_latency 52801.980198                       # average ReadReq mshr miss latency
+system.cpu.icache.ReadReq_hits                   5571                       # number of ReadReq hits
+system.cpu.icache.ReadReq_miss_latency       16908000                       # number of ReadReq miss cycles
+system.cpu.icache.ReadReq_miss_rate          0.051583                       # miss rate for ReadReq accesses
  system.cpu.icache.ReadReq_misses                  303                       # number of ReadReq misses
-system.cpu.icache.ReadReq_mshr_miss_latency     15988000                       # number of ReadReq MSHR miss cycles
-system.cpu.icache.ReadReq_mshr_miss_rate     0.051981                       # mshr miss rate for ReadReq accesses
+system.cpu.icache.ReadReq_mshr_miss_latency     15999000                       # number of ReadReq MSHR miss cycles
+system.cpu.icache.ReadReq_mshr_miss_rate     0.051583                       # mshr miss rate for ReadReq accesses
  system.cpu.icache.ReadReq_mshr_misses             303                       # number of ReadReq MSHR misses
  system.cpu.icache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
  system.cpu.icache.avg_blocked_cycles::no_targets     no_value                       # average number of cycles each access was blocked
-system.cpu.icache.avg_refs                  18.237624                       # Average number of references to valid blocks.
+system.cpu.icache.avg_refs                  18.386139                       # Average number of references to valid blocks.
  system.cpu.icache.blocked::no_mshrs                 0                       # number of cycles access was blocked
  system.cpu.icache.blocked::no_targets               0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_mshrs            0                       # number of cycles access was blocked
  system.cpu.icache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.icache.cache_copies                      0                       # number of cache copies performed
-system.cpu.icache.demand_accesses                5829                       # number of demand (read+write) accesses
-system.cpu.icache.demand_avg_miss_latency 55765.676568                       # average overall miss latency
-system.cpu.icache.demand_avg_mshr_miss_latency 52765.676568                       # average overall mshr miss latency
-system.cpu.icache.demand_hits                    5526                       # number of demand (read+write) hits
-system.cpu.icache.demand_miss_latency        16897000                       # number of demand (read+write) miss cycles
-system.cpu.icache.demand_miss_rate           0.051981                       # miss rate for demand accesses
+system.cpu.icache.demand_accesses                5874                       # number of demand (read+write) accesses
+system.cpu.icache.demand_avg_miss_latency 55801.980198                       # average overall miss latency
+system.cpu.icache.demand_avg_mshr_miss_latency 52801.980198                       # average overall mshr miss latency
+system.cpu.icache.demand_hits                    5571                       # number of demand (read+write) hits
+system.cpu.icache.demand_miss_latency        16908000                       # number of demand (read+write) miss cycles
+system.cpu.icache.demand_miss_rate           0.051583                       # miss rate for demand accesses
  system.cpu.icache.demand_misses                   303                       # number of demand (read+write) misses
  system.cpu.icache.demand_mshr_hits                  0                       # number of demand (read+write) MSHR hits
-system.cpu.icache.demand_mshr_miss_latency     15988000                       # number of demand (read+write) MSHR miss cycles
-system.cpu.icache.demand_mshr_miss_rate      0.051981                       # mshr miss rate for demand accesses
+system.cpu.icache.demand_mshr_miss_latency     15999000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.icache.demand_mshr_miss_rate      0.051583                       # mshr miss rate for demand accesses
  system.cpu.icache.demand_mshr_misses              303                       # number of demand (read+write) MSHR misses
  system.cpu.icache.fast_writes                       0                       # number of fast writes performed
  system.cpu.icache.mshr_cap_events                   0                       # number of times MSHR cap was activated
  system.cpu.icache.no_allocate_misses                0                       # Number of misses that were no-allocate
-system.cpu.icache.overall_accesses               5829                       # number of overall (read+write) accesses
-system.cpu.icache.overall_avg_miss_latency 55765.676568                       # average overall miss latency
-system.cpu.icache.overall_avg_mshr_miss_latency 52765.676568                       # average overall mshr miss latency
+system.cpu.icache.overall_accesses               5874                       # number of overall (read+write) accesses
+system.cpu.icache.overall_avg_miss_latency 55801.980198                       # average overall miss latency
+system.cpu.icache.overall_avg_mshr_miss_latency 52801.980198                       # average overall mshr miss latency
  system.cpu.icache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
-system.cpu.icache.overall_hits                   5526                       # number of overall hits
-system.cpu.icache.overall_miss_latency       16897000                       # number of overall miss cycles
-system.cpu.icache.overall_miss_rate          0.051981                       # miss rate for overall accesses
+system.cpu.icache.overall_hits                   5571                       # number of overall hits
+system.cpu.icache.overall_miss_latency       16908000                       # number of overall miss cycles
+system.cpu.icache.overall_miss_rate          0.051583                       # miss rate for overall accesses
  system.cpu.icache.overall_misses                  303                       # number of overall misses
  system.cpu.icache.overall_mshr_hits                 0                       # number of overall MSHR hits
-system.cpu.icache.overall_mshr_miss_latency     15988000                       # number of overall MSHR miss cycles
-system.cpu.icache.overall_mshr_miss_rate     0.051981                       # mshr miss rate for overall accesses
+system.cpu.icache.overall_mshr_miss_latency     15999000                       # number of overall MSHR miss cycles
+system.cpu.icache.overall_mshr_miss_rate     0.051583                       # mshr miss rate for overall accesses
  system.cpu.icache.overall_mshr_misses             303                       # number of overall MSHR misses
  system.cpu.icache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
  system.cpu.icache.overall_mshr_uncacheable_misses            0                       # number of overall MSHR uncacheable misses
  system.cpu.icache.replacements                     13                       # number of replacements
  system.cpu.icache.sampled_refs                    303                       # Sample count of references to valid blocks.
  system.cpu.icache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.icache.tagsinuse                134.267603                       # Cycle average of tags in use
-system.cpu.icache.total_refs                     5526                       # Total number of references to valid blocks.
+system.cpu.icache.tagsinuse                135.362853                       # Cycle average of tags in use
+system.cpu.icache.total_refs                     5571                       # Total number of references to valid blocks.
  system.cpu.icache.warmup_cycle                      0                       # Cycle when the warmup percentage was hit.
  system.cpu.icache.writebacks                        0                       # number of writebacks
-system.cpu.icache_port.instReqsProcessed         5828                       # Number of Instructions Requests that completed in this resource.
-system.cpu.ipc                               0.097308                       # IPC: Instructions Per Cycle (Per-Thread)
-system.cpu.ipc_total                         0.097308                       # IPC: Total IPC of All Threads
+system.cpu.icache_port.instReqsProcessed         5873                       # Number of Instructions Requests that completed in this resource.
+system.cpu.idleCycles                           46569                       # Number of cycles cpu's stages were not processed
+system.cpu.ipc                               0.099753                       # IPC: Instructions Per Cycle (Per-Thread)
+system.cpu.ipc_total                         0.099753                       # IPC: Total IPC of All Threads
  system.cpu.itb.accesses                             0                       # DTB accesses
  system.cpu.itb.hits                                 0                       # DTB hits
  system.cpu.itb.misses                               0                       # DTB misses
@@ -169,31 +170,31 @@ system.cpu.itb.write_accesses                       0                       # DT
  system.cpu.itb.write_hits                           0                       # DTB write hits
  system.cpu.itb.write_misses                         0                       # DTB write misses
  system.cpu.l2cache.ReadExReq_accesses              51                       # number of ReadExReq accesses(hits+misses)
-system.cpu.l2cache.ReadExReq_avg_miss_latency        52500                       # average ReadExReq miss latency
+system.cpu.l2cache.ReadExReq_avg_miss_latency 52264.705882                       # average ReadExReq miss latency
  system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40098.039216                       # average ReadExReq mshr miss latency
-system.cpu.l2cache.ReadExReq_miss_latency      2677500                       # number of ReadExReq miss cycles
+system.cpu.l2cache.ReadExReq_miss_latency      2665500                       # number of ReadExReq miss cycles
  system.cpu.l2cache.ReadExReq_miss_rate              1                       # miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_misses                51                       # number of ReadExReq misses
  system.cpu.l2cache.ReadExReq_mshr_miss_latency      2045000                       # number of ReadExReq MSHR miss cycles
  system.cpu.l2cache.ReadExReq_mshr_miss_rate            1                       # mshr miss rate for ReadExReq accesses
  system.cpu.l2cache.ReadExReq_mshr_misses           51                       # number of ReadExReq MSHR misses
  system.cpu.l2cache.ReadReq_accesses               390                       # number of ReadReq accesses(hits+misses)
-system.cpu.l2cache.ReadReq_avg_miss_latency 52052.835052                       # average ReadReq miss latency
-system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40023.195876                       # average ReadReq mshr miss latency
+system.cpu.l2cache.ReadReq_avg_miss_latency 52091.494845                       # average ReadReq miss latency
+system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40048.969072                       # average ReadReq mshr miss latency
  system.cpu.l2cache.ReadReq_hits                     2                       # number of ReadReq hits
-system.cpu.l2cache.ReadReq_miss_latency      20196500                       # number of ReadReq miss cycles
+system.cpu.l2cache.ReadReq_miss_latency      20211500                       # number of ReadReq miss cycles
  system.cpu.l2cache.ReadReq_miss_rate         0.994872                       # miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_misses                 388                       # number of ReadReq misses
-system.cpu.l2cache.ReadReq_mshr_miss_latency     15529000                       # number of ReadReq MSHR miss cycles
+system.cpu.l2cache.ReadReq_mshr_miss_latency     15539000                       # number of ReadReq MSHR miss cycles
  system.cpu.l2cache.ReadReq_mshr_miss_rate     0.994872                       # mshr miss rate for ReadReq accesses
  system.cpu.l2cache.ReadReq_mshr_misses            388                       # number of ReadReq MSHR misses
  system.cpu.l2cache.UpgradeReq_accesses             13                       # number of UpgradeReq accesses(hits+misses)
-system.cpu.l2cache.UpgradeReq_avg_miss_latency 52538.461538                       # average UpgradeReq miss latency
-system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40076.923077                       # average UpgradeReq mshr miss latency
-system.cpu.l2cache.UpgradeReq_miss_latency       683000                       # number of UpgradeReq miss cycles
+system.cpu.l2cache.UpgradeReq_avg_miss_latency 52192.307692                       # average UpgradeReq miss latency
+system.cpu.l2cache.UpgradeReq_avg_mshr_miss_latency 40153.846154                       # average UpgradeReq mshr miss latency
+system.cpu.l2cache.UpgradeReq_miss_latency       678500                       # number of UpgradeReq miss cycles
  system.cpu.l2cache.UpgradeReq_miss_rate             1                       # miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_misses               13                       # number of UpgradeReq misses
-system.cpu.l2cache.UpgradeReq_mshr_miss_latency       521000                       # number of UpgradeReq MSHR miss cycles
+system.cpu.l2cache.UpgradeReq_mshr_miss_latency       522000                       # number of UpgradeReq MSHR miss cycles
  system.cpu.l2cache.UpgradeReq_mshr_miss_rate            1                       # mshr miss rate for UpgradeReq accesses
  system.cpu.l2cache.UpgradeReq_mshr_misses           13                       # number of UpgradeReq MSHR misses
  system.cpu.l2cache.avg_blocked_cycles::no_mshrs     no_value                       # average number of cycles each access was blocked
@@ -205,29 +206,29 @@ system.cpu.l2cache.blocked_cycles::no_mshrs            0                       #
  system.cpu.l2cache.blocked_cycles::no_targets            0                       # number of cycles access was blocked
  system.cpu.l2cache.cache_copies                     0                       # number of cache copies performed
  system.cpu.l2cache.demand_accesses                441                       # number of demand (read+write) accesses
-system.cpu.l2cache.demand_avg_miss_latency 52104.783599                       # average overall miss latency
-system.cpu.l2cache.demand_avg_mshr_miss_latency 40031.890661                       # average overall mshr miss latency
+system.cpu.l2cache.demand_avg_miss_latency 52111.617312                       # average overall miss latency
+system.cpu.l2cache.demand_avg_mshr_miss_latency 40054.669704                       # average overall mshr miss latency
  system.cpu.l2cache.demand_hits                      2                       # number of demand (read+write) hits
-system.cpu.l2cache.demand_miss_latency       22874000                       # number of demand (read+write) miss cycles
+system.cpu.l2cache.demand_miss_latency       22877000                       # number of demand (read+write) miss cycles
  system.cpu.l2cache.demand_miss_rate          0.995465                       # miss rate for demand accesses
  system.cpu.l2cache.demand_misses                  439                       # number of demand (read+write) misses
  system.cpu.l2cache.demand_mshr_hits                 0                       # number of demand (read+write) MSHR hits
-system.cpu.l2cache.demand_mshr_miss_latency     17574000                       # number of demand (read+write) MSHR miss cycles
+system.cpu.l2cache.demand_mshr_miss_latency     17584000                       # number of demand (read+write) MSHR miss cycles
  system.cpu.l2cache.demand_mshr_miss_rate     0.995465                       # mshr miss rate for demand accesses
  system.cpu.l2cache.demand_mshr_misses             439                       # number of demand (read+write) MSHR misses
  system.cpu.l2cache.fast_writes                      0                       # number of fast writes performed
  system.cpu.l2cache.mshr_cap_events                  0                       # number of times MSHR cap was activated
  system.cpu.l2cache.no_allocate_misses               0                       # Number of misses that were no-allocate
  system.cpu.l2cache.overall_accesses               441                       # number of overall (read+write) accesses
-system.cpu.l2cache.overall_avg_miss_latency 52104.783599                       # average overall miss latency
-system.cpu.l2cache.overall_avg_mshr_miss_latency 40031.890661                       # average overall mshr miss latency
+system.cpu.l2cache.overall_avg_miss_latency 52111.617312                       # average overall miss latency
+system.cpu.l2cache.overall_avg_mshr_miss_latency 40054.669704                       # average overall mshr miss latency
  system.cpu.l2cache.overall_avg_mshr_uncacheable_latency     no_value                       # average overall mshr uncacheable latency
  system.cpu.l2cache.overall_hits                     2                       # number of overall hits
-system.cpu.l2cache.overall_miss_latency      22874000                       # number of overall miss cycles
+system.cpu.l2cache.overall_miss_latency      22877000                       # number of overall miss cycles
  system.cpu.l2cache.overall_miss_rate         0.995465                       # miss rate for overall accesses
  system.cpu.l2cache.overall_misses                 439                       # number of overall misses
  system.cpu.l2cache.overall_mshr_hits                0                       # number of overall MSHR hits
-system.cpu.l2cache.overall_mshr_miss_latency     17574000                       # number of overall MSHR miss cycles
+system.cpu.l2cache.overall_mshr_miss_latency     17584000                       # number of overall MSHR miss cycles
  system.cpu.l2cache.overall_mshr_miss_rate     0.995465                       # mshr miss rate for overall accesses
  system.cpu.l2cache.overall_mshr_misses            439                       # number of overall MSHR misses
  system.cpu.l2cache.overall_mshr_uncacheable_latency            0                       # number of overall MSHR uncacheable cycles
@@ -235,16 +236,32 @@ system.cpu.l2cache.overall_mshr_uncacheable_misses            0
  system.cpu.l2cache.replacements                     0                       # number of replacements
  system.cpu.l2cache.sampled_refs                   375                       # Sample count of references to valid blocks.
  system.cpu.l2cache.soft_prefetch_mshr_full            0                       # number of mshr full events for SW prefetching instrutions
-system.cpu.l2cache.tagsinuse               185.807591                       # Cycle average of tags in use
+system.cpu.l2cache.tagsinuse               187.032260                       # Cycle average of tags in use
  system.cpu.l2cache.total_refs                       2                       # Total number of references to valid blocks.
  system.cpu.l2cache.warmup_cycle                     0                       # Cycle when the warmup percentage was hit.
  system.cpu.l2cache.writebacks                       0                       # number of writebacks
-system.cpu.numCycles                            59882                       # number of cpu cycles simulated
+system.cpu.numCycles                            58414                       # number of cpu cycles simulated
+system.cpu.runCycles                            11845                       # Number of cycles cpu stages are processed.
  system.cpu.smtCommittedInsts                        0                       # Number of SMT Instructions Simulated (Per-Thread)
-system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was simultaneous multithreading.(SMT)
+system.cpu.smtCycles                                0                       # Total number of cycles that the CPU was in SMT-mode
  system.cpu.smt_cpi                           no_value                       # CPI: Total SMT-CPI
  system.cpu.smt_ipc                           no_value                       # IPC: Total SMT-IPC
-system.cpu.threadCycles                         59882                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
+system.cpu.stage-0.idleCycles                   52540                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-0.runCycles                     5874                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-0.utilization              10.055809                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-1.idleCycles                   52586                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-1.runCycles                     5828                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-1.utilization               9.977060                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-2.idleCycles                   52582                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-2.runCycles                     5832                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-2.utilization               9.983908                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-3.idleCycles                   56324                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-3.runCycles                     2090                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-3.utilization               3.577909                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.stage-4.idleCycles                   52587                       # Number of cycles 0 instructions are processed.
+system.cpu.stage-4.runCycles                     5827                       # Number of cycles 1+ instructions are processed.
+system.cpu.stage-4.utilization               9.975348                       # Percentage of cycles stage was utilized (processing insts).
+system.cpu.threadCycles                         58414                       # Total Number of Cycles A Thread Was Active in CPU (Per-Thread)
  system.cpu.workload.PROG:num_syscalls               8                       # Number of system calls
  
  ---------- End Simulation Statistics   ----------
author	Brad Beckmann <Brad.Beckmann@amd.com>
	Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)
committer	Brad Beckmann <Brad.Beckmann@amd.com>
	Mon, 1 Feb 2010 06:28:13 +0000 (22:28 -0800)
src/cpu/inorder/InOrderCPU.py		patch \| blob \| history
src/cpu/inorder/SConscript		patch \| blob \| history
src/cpu/inorder/cpu.cc		patch \| blob \| history
src/cpu/inorder/cpu.hh		patch \| blob \| history
src/cpu/inorder/first_stage.cc		patch \| blob \| history
src/cpu/inorder/first_stage.hh		patch \| blob \| history
src/cpu/inorder/inorder_dyn_inst.cc		patch \| blob \| history
src/cpu/inorder/inorder_dyn_inst.hh		patch \| blob \| history
src/cpu/inorder/pipeline_stage.cc		patch \| blob \| history
src/cpu/inorder/pipeline_stage.hh		patch \| blob \| history
src/cpu/inorder/pipeline_traits.cc		patch \| blob \| history
src/cpu/inorder/pipeline_traits.hh		patch \| blob \| history
src/cpu/inorder/reg_dep_map.cc		patch \| blob \| history
src/cpu/inorder/reg_dep_map.hh		patch \| blob \| history
src/cpu/inorder/resource.cc		patch \| blob \| history
src/cpu/inorder/resource.hh		patch \| blob \| history
src/cpu/inorder/resource_pool.cc		patch \| blob \| history
src/cpu/inorder/resource_pool.hh		patch \| blob \| history
src/cpu/inorder/resources/cache_unit.cc		patch \| blob \| history
src/cpu/inorder/resources/cache_unit.hh		patch \| blob \| history
src/cpu/inorder/resources/execution_unit.cc		patch \| blob \| history
src/cpu/inorder/resources/execution_unit.hh		patch \| blob \| history
src/cpu/inorder/resources/fetch_seq_unit.cc		patch \| blob \| history
src/cpu/inorder/resources/fetch_seq_unit.hh		patch \| blob \| history
src/cpu/inorder/resources/graduation_unit.cc		patch \| blob \| history
src/cpu/inorder/resources/graduation_unit.hh		patch \| blob \| history
src/cpu/inorder/resources/inst_buffer.cc		patch \| blob \| history
src/cpu/inorder/resources/mult_div_unit.hh		patch \| blob \| history
src/cpu/inorder/resources/use_def.cc		patch \| blob \| history
src/cpu/inorder/resources/use_def.hh		patch \| blob \| history
src/cpu/inorder/thread_context.cc		patch \| blob \| history
src/cpu/inorder/thread_context.hh		patch \| blob \| history
src/cpu/inorder/thread_state.hh		patch \| blob \| history
tests/long/50.vortex/ref/alpha/tru64/inorder-timing/config.ini		patch \| blob \| history
tests/long/50.vortex/ref/alpha/tru64/inorder-timing/simout		patch \| blob \| history
tests/long/50.vortex/ref/alpha/tru64/inorder-timing/stats.txt		patch \| blob \| history
tests/long/70.twolf/ref/alpha/tru64/inorder-timing/config.ini		patch \| blob \| history
tests/long/70.twolf/ref/alpha/tru64/inorder-timing/simout		patch \| blob \| history
tests/long/70.twolf/ref/alpha/tru64/inorder-timing/stats.txt		patch \| blob \| history
tests/quick/00.hello/ref/alpha/linux/inorder-timing/config.ini		patch \| blob \| history
tests/quick/00.hello/ref/alpha/linux/inorder-timing/simout		patch \| blob \| history
tests/quick/00.hello/ref/alpha/linux/inorder-timing/stats.txt		patch \| blob \| history
tests/quick/00.hello/ref/mips/linux/inorder-timing/config.ini		patch \| blob \| history
tests/quick/00.hello/ref/mips/linux/inorder-timing/simout		patch \| blob \| history
tests/quick/00.hello/ref/mips/linux/inorder-timing/stats.txt		patch \| blob \| history