cpu: add a condition-code register class

[gem5.git] / src / cpu / o3 / inst_queue_impl.hh
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh

index bdf5f07aa7eeb1bbcc7f626409859fbea54219f9..1c86b7c89ee88993732d5aa3a85f50079204ed37 100644 (file)
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1,4 +1,17 @@
  /*
+ * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
   * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
@@ -32,19 +45,23 @@
  #include <limits>
  #include <vector>
  
-#include "sim/core.hh"
-
  #include "cpu/o3/fu_pool.hh"
  #include "cpu/o3/inst_queue.hh"
+#include "debug/IQ.hh"
+#include "enums/OpClass.hh"
+#include "params/DerivO3CPU.hh"
+#include "sim/core.hh"
+
+// clang complains about std::set being overloaded with Packet::set if
+// we open up the entire namespace std
+using std::list;
  
  template <class Impl>
  InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
-                                                   int fu_idx,
-                                                   InstructionQueue<Impl> *iq_ptr)
-    : Event(&mainEventQueue, Stat_Event_Pri),
+    int fu_idx, InstructionQueue<Impl> *iq_ptr)
+    : Event(Stat_Event_Pri, AutoDelete),
        inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
  {
-    this->setFlags(Event::AutoDelete);
  }
  
  template <class Impl>
@@ -58,31 +75,28 @@ InstructionQueue<Impl>::FUCompletion::process()
  
  template <class Impl>
  const char *
-InstructionQueue<Impl>::FUCompletion::description()
+InstructionQueue<Impl>::FUCompletion::description() const
  {
-    return "Functional unit completion event";
+    return "Functional unit completion";
  }
  
  template <class Impl>
  InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
-                                         Params *params)
+                                         DerivO3CPUParams *params)
      : cpu(cpu_ptr),
        iewStage(iew_ptr),
        fuPool(params->fuPool),
        numEntries(params->numIQEntries),
        totalWidth(params->issueWidth),
-      numPhysIntRegs(params->numPhysIntRegs),
-      numPhysFloatRegs(params->numPhysFloatRegs),
        commitToIEWDelay(params->commitToIEWDelay)
  {
      assert(fuPool);
  
-    switchedOut = false;
-
-    numThreads = params->numberOfThreads;
+    numThreads = params->numThreads;
  
-    // Set the number of physical registers as the number of int + float
-    numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+    // Set the number of total physical registers
+    numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
+        params->numPhysCCRegs;
  
      //Create an entry for each physical register within the
      //dependency graph.
@@ -92,9 +106,9 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
      regScoreboard.resize(numPhysRegs);
  
      //Initialize Mem Dependence Units
-    for (int i = 0; i < numThreads; i++) {
-        memDepUnit[i].init(params,i);
-        memDepUnit[i].setIQ(this);
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        memDepUnit[tid].init(params, tid);
+        memDepUnit[tid].setIQ(this);
      }
  
      resetState();
@@ -110,8 +124,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
          iqPolicy = Dynamic;
  
          //Set Max Entries to Total ROB Capacity
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = numEntries;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = numEntries;
          }
  
      } else if (policy == "partitioned") {
@@ -121,8 +135,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
          int part_amt = numEntries / numThreads;
  
          //Divide ROB up evenly
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = part_amt;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = part_amt;
          }
  
          DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
@@ -135,8 +149,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
          int thresholdIQ = (int)((double)threshold * numEntries);
  
          //Divide up by threshold amount
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = thresholdIQ;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = thresholdIQ;
          }
  
          DPRINTF(IQ, "IQ sharing policy set to Threshold:"
@@ -243,14 +257,14 @@ InstructionQueue<Impl>::regStats()
  */
      numIssuedDist
          .init(0,totalWidth,1)
-        .name(name() + ".ISSUE:issued_per_cycle")
+        .name(name() + ".issued_per_cycle")
          .desc("Number of insts issued each cycle")
          .flags(pdf)
          ;
  /*
      dist_unissued
          .init(Num_OpClasses+2)
-        .name(name() + ".ISSUE:unissued_cause")
+        .name(name() + ".unissued_cause")
          .desc("Reason ready instruction not issued")
          .flags(pdf | dist)
          ;
@@ -259,12 +273,12 @@ InstructionQueue<Impl>::regStats()
      }
  */
      statIssuedInstType
-        .init(numThreads,Num_OpClasses)
-        .name(name() + ".ISSUE:FU_type")
+        .init(numThreads,Enums::Num_OpClass)
+        .name(name() + ".FU_type")
          .desc("Type of FU issued")
          .flags(total | pdf | dist)
          ;
-    statIssuedInstType.ysubnames(opClassStrings);
+    statIssuedInstType.ysubnames(Enums::OpClassStrings);
  
      //
      //  How long did instructions for a particular FU type wait prior to issue
@@ -272,7 +286,7 @@ InstructionQueue<Impl>::regStats()
  /*
      issueDelayDist
          .init(Num_OpClasses,0,99,2)
-        .name(name() + ".ISSUE:")
+        .name(name() + ".")
          .desc("cycles from operands ready to issue")
          .flags(pdf | cdf)
          ;
@@ -284,7 +298,7 @@ InstructionQueue<Impl>::regStats()
      }
  */
      issueRate
-        .name(name() + ".ISSUE:rate")
+        .name(name() + ".rate")
          .desc("Inst issue rate")
          .flags(total)
          ;
@@ -292,32 +306,73 @@ InstructionQueue<Impl>::regStats()
  
      statFuBusy
          .init(Num_OpClasses)
-        .name(name() + ".ISSUE:fu_full")
+        .name(name() + ".fu_full")
          .desc("attempts to use FU when none available")
          .flags(pdf | dist)
          ;
      for (int i=0; i < Num_OpClasses; ++i) {
-        statFuBusy.subname(i, opClassStrings[i]);
+        statFuBusy.subname(i, Enums::OpClassStrings[i]);
      }
  
      fuBusy
          .init(numThreads)
-        .name(name() + ".ISSUE:fu_busy_cnt")
+        .name(name() + ".fu_busy_cnt")
          .desc("FU busy when requested")
          .flags(total)
          ;
  
      fuBusyRate
-        .name(name() + ".ISSUE:fu_busy_rate")
+        .name(name() + ".fu_busy_rate")
          .desc("FU busy rate (busy events/executed inst)")
          .flags(total)
          ;
      fuBusyRate = fuBusy / iqInstsIssued;
  
-    for ( int i=0; i < numThreads; i++) {
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
          // Tell mem dependence unit to reg stats as well.
-        memDepUnit[i].regStats();
+        memDepUnit[tid].regStats();
      }
+
+    intInstQueueReads
+        .name(name() + ".int_inst_queue_reads")
+        .desc("Number of integer instruction queue reads")
+        .flags(total);
+
+    intInstQueueWrites
+        .name(name() + ".int_inst_queue_writes")
+        .desc("Number of integer instruction queue writes")
+        .flags(total);
+
+    intInstQueueWakeupAccesses
+        .name(name() + ".int_inst_queue_wakeup_accesses")
+        .desc("Number of integer instruction queue wakeup accesses")
+        .flags(total);
+
+    fpInstQueueReads
+        .name(name() + ".fp_inst_queue_reads")
+        .desc("Number of floating instruction queue reads")
+        .flags(total);
+
+    fpInstQueueWrites
+        .name(name() + ".fp_inst_queue_writes")
+        .desc("Number of floating instruction queue writes")
+        .flags(total);
+
+    fpInstQueueWakeupQccesses
+        .name(name() + ".fp_inst_queue_wakeup_accesses")
+        .desc("Number of floating instruction queue wakeup accesses")
+        .flags(total);
+
+    intAluAccesses
+        .name(name() + ".int_alu_accesses")
+        .desc("Number of integer alu accesses")
+        .flags(total);
+
+    fpAluAccesses
+        .name(name() + ".fp_alu_accesses")
+        .desc("Number of floating point alu accesses")
+        .flags(total);
+
  }
  
  template <class Impl>
@@ -325,9 +380,9 @@ void
  InstructionQueue<Impl>::resetState()
  {
      //Initialize thread IQ counts
-    for (int i = 0; i <numThreads; i++) {
-        count[i] = 0;
-        instList[i].clear();
+    for (ThreadID tid = 0; tid <numThreads; tid++) {
+        count[tid] = 0;
+        instList[tid].clear();
      }
  
      // Initialize the number of free IQ entries.
@@ -342,8 +397,8 @@ InstructionQueue<Impl>::resetState()
          regScoreboard[i] = false;
      }
  
-    for (int i = 0; i < numThreads; ++i) {
-        squashedSeqNum[i] = 0;
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        squashedSeqNum[tid] = 0;
      }
  
      for (int i = 0; i < Num_OpClasses; ++i) {
@@ -354,11 +409,12 @@ InstructionQueue<Impl>::resetState()
      }
      nonSpecInsts.clear();
      listOrder.clear();
+    deferredMemInsts.clear();
  }
  
  template <class Impl>
  void
-InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+InstructionQueue<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
  {
      activeThreads = at_ptr;
  }
@@ -381,34 +437,24 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
  
  template <class Impl>
  void
-InstructionQueue<Impl>::switchOut()
+InstructionQueue<Impl>::drainSanityCheck() const
  {
-/*
-    if (!instList[0].empty() || (numEntries != freeEntries) ||
-        !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
-        dumpInsts();
-//        assert(0);
-    }
-*/
-    resetState();
-    dependGraph.reset();
-    instsToExecute.clear();
-    switchedOut = true;
-    for (int i = 0; i < numThreads; ++i) {
-        memDepUnit[i].switchOut();
-    }
+    assert(dependGraph.empty());
+    assert(instsToExecute.empty());
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        memDepUnit[tid].drainSanityCheck();
  }
  
  template <class Impl>
  void
  InstructionQueue<Impl>::takeOverFrom()
  {
-    switchedOut = false;
+    resetState();
  }
  
  template <class Impl>
  int
-InstructionQueue<Impl>::entryAmount(int num_threads)
+InstructionQueue<Impl>::entryAmount(ThreadID num_threads)
  {
      if (iqPolicy == Partitioned) {
          return numEntries / num_threads;
@@ -425,11 +471,11 @@ InstructionQueue<Impl>::resetEntries()
      if (iqPolicy != Dynamic || numThreads > 1) {
          int active_threads = activeThreads->size();
  
-        std::list<unsigned>::iterator threads = activeThreads->begin();
-        std::list<unsigned>::iterator end = activeThreads->end();
+        list<ThreadID>::iterator threads = activeThreads->begin();
+        list<ThreadID>::iterator end = activeThreads->end();
  
          while (threads != end) {
-            unsigned tid = *threads++;
+            ThreadID tid = *threads++;
  
              if (iqPolicy == Partitioned) {
                  maxEntries[tid] = numEntries / active_threads;
@@ -449,7 +495,7 @@ InstructionQueue<Impl>::numFreeEntries()
  
  template <class Impl>
  unsigned
-InstructionQueue<Impl>::numFreeEntries(unsigned tid)
+InstructionQueue<Impl>::numFreeEntries(ThreadID tid)
  {
      return maxEntries[tid] - count[tid];
  }
@@ -469,7 +515,7 @@ InstructionQueue<Impl>::isFull()
  
  template <class Impl>
  bool
-InstructionQueue<Impl>::isFull(unsigned tid)
+InstructionQueue<Impl>::isFull(ThreadID tid)
  {
      if (numFreeEntries(tid) == 0) {
          return(true);
@@ -499,11 +545,12 @@ template <class Impl>
  void
  InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
  {
+    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
      // Make sure the instruction is valid
      assert(new_inst);
  
-    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n",
-            new_inst->seqNum, new_inst->readPC());
+    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %s to the IQ.\n",
+            new_inst->seqNum, new_inst->pcState());
  
      assert(freeEntries != 0);
  
@@ -540,14 +587,15 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
  {
      // @todo: Clean up this code; can do it by setting inst as unable
      // to issue, then calling normal insert on the inst.
+    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
  
      assert(new_inst);
  
      nonSpecInsts[new_inst->seqNum] = new_inst;
  
-    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x "
+    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %s "
              "to the IQ.\n",
-            new_inst->seqNum, new_inst->readPC());
+            new_inst->seqNum, new_inst->pcState());
  
      assert(freeEntries != 0);
  
@@ -590,6 +638,11 @@ InstructionQueue<Impl>::getInstToExecute()
      assert(!instsToExecute.empty());
      DynInstPtr inst = instsToExecute.front();
      instsToExecute.pop_front();
+    if (inst->isFloating()){
+        fpInstQueueReads++;
+    } else {
+        intInstQueueReads++;
+    }
      return inst;
  }
  
@@ -651,14 +704,9 @@ void
  InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
  {
      DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
+    assert(!cpu->switchedOut());
      // The CPU could have been sleeping until this op completed (*extremely*
      // long latency op).  Wake it if it was.  This may be overkill.
-    if (isSwitchedOut()) {
-        DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
-                inst->seqNum);
-        return;
-    }
-
      iewStage->wakeCPU();
  
      if (fu_idx > -1)
@@ -667,7 +715,7 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
      // @todo: Ensure that these FU Completions happen at the beginning
      // of a cycle, otherwise they could add too many instructions to
      // the queue.
-    issueToExecuteQueue->access(0)->size++;
+    issueToExecuteQueue->access(-1)->size++;
      instsToExecute.push_back(inst);
  }
  
@@ -683,6 +731,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
  
      IssueStruct *i2e_info = issueToExecuteQueue->access(0);
  
+    DynInstPtr deferred_mem_inst;
+    int total_deferred_mem_issued = 0;
+    while (total_deferred_mem_issued < totalWidth &&
+           (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
+        issueToExecuteQueue->access(0)->size++;
+        instsToExecute.push_back(deferred_mem_inst);
+        total_deferred_mem_issued++;
+    }
+
      // Have iterator to head of the list
      // While I haven't exceeded bandwidth or reached the end of the list,
      // Try to get a FU that can do what this op needs.
@@ -695,7 +752,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
      ListOrderIt order_end_it = listOrder.end();
      int total_issued = 0;
  
-    while (total_issued < totalWidth &&
+    while (total_issued < (totalWidth - total_deferred_mem_issued) &&
             iewStage->canIssue() &&
             order_it != order_end_it) {
          OpClass op_class = (*order_it).queueType;
@@ -704,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
  
          DynInstPtr issuing_inst = readyInsts[op_class].top();
  
+        issuing_inst->isFloating() ? fpInstQueueReads++ : intInstQueueReads++;
+
          assert(issuing_inst->seqNum == (*order_it).oldestInst);
  
          if (issuing_inst->isSquashed()) {
@@ -724,12 +783,12 @@ InstructionQueue<Impl>::scheduleReadyInsts()
          }
  
          int idx = -2;
-        int op_latency = 1;
-        int tid = issuing_inst->threadNumber;
+        Cycles op_latency = Cycles(1);
+        ThreadID tid = issuing_inst->threadNumber;
  
          if (op_class != No_OpClass) {
              idx = fuPool->getUnit(op_class);
-
+            issuing_inst->isFloating() ? fpAluAccesses++ : intAluAccesses++;
              if (idx > -1) {
                  op_latency = fuPool->getOpLatency(op_class);
              }
@@ -738,7 +797,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
          // If we have an instruction that doesn't require a FU, or a
          // valid FU, then schedule for execution.
          if (idx == -2 || idx != -1) {
-            if (op_latency == 1) {
+            if (op_latency == Cycles(1)) {
                  i2e_info->size++;
                  instsToExecute.push_back(issuing_inst);
  
@@ -747,15 +806,16 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                  if (idx >= 0)
                      fuPool->freeUnitNextCycle(idx);
              } else {
-                int issue_latency = fuPool->getIssueLatency(op_class);
+                Cycles issue_latency = fuPool->getIssueLatency(op_class);
                  // Generate completion event for the FU
                  FUCompletion *execution = new FUCompletion(issuing_inst,
                                                             idx, this);
  
-                execution->schedule(curTick + cpu->cycles(issue_latency - 1));
+                cpu->schedule(execution,
+                              cpu->clockEdge(Cycles(op_latency - 1)));
  
                  // @todo: Enforce that issue_latency == 1 or op_latency
-                if (issue_latency > 1) {
+                if (issue_latency > Cycles(1)) {
                      // If FU isn't pipelined, then it must be freed
                      // upon the execution completing.
                      execution->setFreeFU();
@@ -765,9 +825,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                  }
              }
  
-            DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+            DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
                      "[sn:%lli]\n",
-                    tid, issuing_inst->readPC(),
+                    tid, issuing_inst->pcState(),
                      issuing_inst->seqNum);
  
              readyInsts[op_class].pop();
@@ -782,6 +842,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
              issuing_inst->setIssued();
              ++total_issued;
  
+#if TRACING_ON
+            issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
+#endif
+
              if (!issuing_inst->isMemRef()) {
                  // Memory instructions can not be freed from the IQ until they
                  // complete.
@@ -806,7 +870,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
      iqInstsIssued+= total_issued;
  
      // If we issued any instructions, tell the CPU we had activity.
-    if (total_issued) {
+    // @todo If the way deferred memory instructions are handeled due to
+    // translation changes then the deferredMemInsts condition should be removed
+    // from the code below.
+    if (total_issued || total_deferred_mem_issued || deferredMemInsts.size()) {
          cpu->activityThisCycle();
      } else {
          DPRINTF(IQ, "Not able to schedule any instructions.\n");
@@ -824,7 +891,7 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
  
      assert(inst_it != nonSpecInsts.end());
  
-    unsigned tid = (*inst_it).second->threadNumber;
+    ThreadID tid = (*inst_it).second->threadNumber;
  
      (*inst_it).second->setAtCommit();
  
@@ -843,7 +910,7 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
  
  template <class Impl>
  void
-InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+InstructionQueue<Impl>::commit(const InstSeqNum &inst, ThreadID tid)
  {
      DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
              tid,inst);
@@ -865,6 +932,13 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
  {
      int dependents = 0;
  
+    // The instruction queue here takes care of both floating and int ops
+    if (completed_inst->isFloating()) {
+        fpInstQueueWakeupQccesses++;
+    } else {
+        intInstQueueWakeupAccesses++;
+    }
+
      DPRINTF(IQ, "Waking dependents of completed instruction.\n");
  
      assert(!completed_inst->isSquashed());
@@ -894,6 +968,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
          // handled by the IQ and thus have no dependency graph entry.
          // @todo Figure out a cleaner way to handle this.
          if (dest_reg >= numPhysRegs) {
+            DPRINTF(IQ, "dest_reg :%d, numPhysRegs: %d\n", dest_reg,
+                    numPhysRegs);
              continue;
          }
  
@@ -905,8 +981,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
          DynInstPtr dep_inst = dependGraph.pop(dest_reg);
  
          while (dep_inst) {
-            DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
-                    dep_inst->readPC());
+            DPRINTF(IQ, "Waking up a dependent instruction, [sn:%lli] "
+                    "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
  
              // Might want to give more information to the instruction
              // so that it knows which of its source registers is
@@ -951,8 +1027,8 @@ InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
      }
  
      DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
-            "the ready list, PC %#x opclass:%i [sn:%lli].\n",
-            ready_inst->readPC(), op_class, ready_inst->seqNum);
+            "the ready list, PC %s opclass:%i [sn:%lli].\n",
+            ready_inst->pcState(), op_class, ready_inst->seqNum);
  }
  
  template <class Impl>
@@ -960,6 +1036,11 @@ void
  InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
  {
      DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+
+    // Reset DTB translation state
+    resched_inst->translationStarted(false);
+    resched_inst->translationCompleted(false);
+
      resched_inst->clearCanIssue();
      memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
  }
@@ -975,30 +1056,53 @@ template <class Impl>
  void
  InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
  {
-    int tid = completed_inst->threadNumber;
+    ThreadID tid = completed_inst->threadNumber;
  
-    DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
-            completed_inst->readPC(), completed_inst->seqNum);
+    DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%lli]\n",
+            completed_inst->pcState(), completed_inst->seqNum);
  
      ++freeEntries;
  
-    completed_inst->memOpDone = true;
+    completed_inst->memOpDone(true);
  
      memDepUnit[tid].completed(completed_inst);
      count[tid]--;
  }
  
+template <class Impl>
+void
+InstructionQueue<Impl>::deferMemInst(DynInstPtr &deferred_inst)
+{
+    deferredMemInsts.push_back(deferred_inst);
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+InstructionQueue<Impl>::getDeferredMemInstToExecute()
+{
+    for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
+         ++it) {
+        if ((*it)->translationCompleted() || (*it)->isSquashed()) {
+            DynInstPtr ret = *it;
+            deferredMemInsts.erase(it);
+            return ret;
+        }
+    }
+    return NULL;
+}
+
  template <class Impl>
  void
  InstructionQueue<Impl>::violation(DynInstPtr &store,
                                    DynInstPtr &faulting_load)
  {
+    intInstQueueWrites++;
      memDepUnit[store->threadNumber].violation(store, faulting_load);
  }
  
  template <class Impl>
  void
-InstructionQueue<Impl>::squash(unsigned tid)
+InstructionQueue<Impl>::squash(ThreadID tid)
  {
      DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
              "the IQ.\n", tid);
@@ -1018,7 +1122,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
  
  template <class Impl>
  void
-InstructionQueue<Impl>::doSquash(unsigned tid)
+InstructionQueue<Impl>::doSquash(ThreadID tid)
  {
      // Start at the tail.
      ListIt squash_it = instList[tid].end();
@@ -1033,6 +1137,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
             (*squash_it)->seqNum > squashedSeqNum[tid]) {
  
          DynInstPtr squashed_inst = (*squash_it);
+        squashed_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
  
          // Only handle the instruction if it actually is in the IQ and
          // hasn't already been squashed in the IQ.
@@ -1044,11 +1149,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
  
          if (!squashed_inst->isIssued() ||
              (squashed_inst->isMemRef() &&
-             !squashed_inst->memOpDone)) {
+             !squashed_inst->memOpDone())) {
  
-            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
-                    "squashed.\n",
-                    tid, squashed_inst->seqNum, squashed_inst->readPC());
+            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n",
+                    tid, squashed_inst->seqNum, squashed_inst->pcState());
  
              // Remove the instruction from the dependency list.
              if (!squashed_inst->isNonSpeculative() &&
@@ -1084,7 +1188,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
                         !squashed_inst->isCompleted()) {
                  NonSpecMapIt ns_inst_it =
                      nonSpecInsts.find(squashed_inst->seqNum);
-                assert(ns_inst_it != nonSpecInsts.end());
+
                  if (ns_inst_it == nonSpecInsts.end()) {
                      assert(squashed_inst->getFault() != NoFault);
                  } else {
@@ -1143,9 +1247,9 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
              if (src_reg >= numPhysRegs) {
                  continue;
              } else if (regScoreboard[src_reg] == false) {
-                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                DPRINTF(IQ, "Instruction PC %s has src reg %i that "
                          "is being added to the dependency chain.\n",
-                        new_inst->readPC(), src_reg);
+                        new_inst->pcState(), src_reg);
  
                  dependGraph.insert(src_reg, new_inst);
  
@@ -1153,9 +1257,9 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
                  // was added to the dependency graph.
                  return_val = true;
              } else {
-                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                DPRINTF(IQ, "Instruction PC %s has src reg %i that "
                          "became ready before it reached the IQ.\n",
-                        new_inst->readPC(), src_reg);
+                        new_inst->pcState(), src_reg);
                  // Mark a register ready within the instruction.
                  new_inst->markSrcRegReady(src_reg_idx);
              }
@@ -1224,8 +1328,8 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
          OpClass op_class = inst->opClass();
  
          DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
-                "the ready list, PC %#x opclass:%i [sn:%lli].\n",
-                inst->readPC(), op_class, inst->seqNum);
+                "the ready list, PC %s opclass:%i [sn:%lli].\n",
+                inst->pcState(), op_class, inst->seqNum);
  
          readyInsts[op_class].push(inst);
  
@@ -1252,10 +1356,10 @@ InstructionQueue<Impl>::countInsts()
      // Change the #if if you want to use this method.
      int total_insts = 0;
  
-    for (int i = 0; i < numThreads; ++i) {
-        ListIt count_it = instList[i].begin();
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        ListIt count_it = instList[tid].begin();
  
-        while (count_it != instList[i].end()) {
+        while (count_it != instList[tid].end()) {
              if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
                  if (!(*count_it)->isIssued()) {
                      ++total_insts;
@@ -1295,7 +1399,7 @@ InstructionQueue<Impl>::dumpLists()
      cprintf("Non speculative list: ");
  
      while (non_spec_it != non_spec_end_it) {
-        cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+        cprintf("%s [sn:%lli]", (*non_spec_it).second->pcState(),
                  (*non_spec_it).second->seqNum);
          ++non_spec_it;
      }
@@ -1324,21 +1428,19 @@ template <class Impl>
  void
  InstructionQueue<Impl>::dumpInsts()
  {
-    for (int i = 0; i < numThreads; ++i) {
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
          int num = 0;
          int valid_num = 0;
-        ListIt inst_list_it = instList[i].begin();
+        ListIt inst_list_it = instList[tid].begin();
  
-        while (inst_list_it != instList[i].end())
-        {
-            cprintf("Instruction:%i\n",
-                    num);
+        while (inst_list_it != instList[tid].end()) {
+            cprintf("Instruction:%i\n", num);
              if (!(*inst_list_it)->isSquashed()) {
                  if (!(*inst_list_it)->isIssued()) {
                      ++valid_num;
                      cprintf("Count:%i\n", valid_num);
                  } else if ((*inst_list_it)->isMemRef() &&
-                           !(*inst_list_it)->memOpDone) {
+                           !(*inst_list_it)->memOpDone()) {
                      // Loads that have not been marked as executed
                      // still count towards the total instructions.
                      ++valid_num;
@@ -1346,16 +1448,16 @@ InstructionQueue<Impl>::dumpInsts()
                  }
              }
  
-            cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+            cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
                      "Issued:%i\nSquashed:%i\n",
-                    (*inst_list_it)->readPC(),
+                    (*inst_list_it)->pcState(),
                      (*inst_list_it)->seqNum,
                      (*inst_list_it)->threadNumber,
                      (*inst_list_it)->isIssued(),
                      (*inst_list_it)->isSquashed());
  
              if ((*inst_list_it)->isMemRef()) {
-                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
              }
  
              cprintf("\n");
@@ -1380,7 +1482,7 @@ InstructionQueue<Impl>::dumpInsts()
                  ++valid_num;
                  cprintf("Count:%i\n", valid_num);
              } else if ((*inst_list_it)->isMemRef() &&
-                       !(*inst_list_it)->memOpDone) {
+                       !(*inst_list_it)->memOpDone()) {
                  // Loads that have not been marked as executed
                  // still count towards the total instructions.
                  ++valid_num;
@@ -1388,16 +1490,16 @@ InstructionQueue<Impl>::dumpInsts()
              }
          }
  
-        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+        cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
                  "Issued:%i\nSquashed:%i\n",
-                (*inst_list_it)->readPC(),
+                (*inst_list_it)->pcState(),
                  (*inst_list_it)->seqNum,
                  (*inst_list_it)->threadNumber,
                  (*inst_list_it)->isIssued(),
                  (*inst_list_it)->isSquashed());
  
          if ((*inst_list_it)->isMemRef()) {
-            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
          }
  
          cprintf("\n");