cpu: add a condition-code register class
[gem5.git] / src / cpu / o3 / inst_queue_impl.hh
index bdf5f07aa7eeb1bbcc7f626409859fbea54219f9..1c86b7c89ee88993732d5aa3a85f50079204ed37 100644 (file)
@@ -1,4 +1,17 @@
 /*
+ * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
 #include <limits>
 #include <vector>
 
-#include "sim/core.hh"
-
 #include "cpu/o3/fu_pool.hh"
 #include "cpu/o3/inst_queue.hh"
+#include "debug/IQ.hh"
+#include "enums/OpClass.hh"
+#include "params/DerivO3CPU.hh"
+#include "sim/core.hh"
+
+// clang complains about std::set being overloaded with Packet::set if
+// we open up the entire namespace std
+using std::list;
 
 template <class Impl>
 InstructionQueue<Impl>::FUCompletion::FUCompletion(DynInstPtr &_inst,
-                                                   int fu_idx,
-                                                   InstructionQueue<Impl> *iq_ptr)
-    : Event(&mainEventQueue, Stat_Event_Pri),
+    int fu_idx, InstructionQueue<Impl> *iq_ptr)
+    : Event(Stat_Event_Pri, AutoDelete),
       inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
 {
-    this->setFlags(Event::AutoDelete);
 }
 
 template <class Impl>
@@ -58,31 +75,28 @@ InstructionQueue<Impl>::FUCompletion::process()
 
 template <class Impl>
 const char *
-InstructionQueue<Impl>::FUCompletion::description()
+InstructionQueue<Impl>::FUCompletion::description() const
 {
-    return "Functional unit completion event";
+    return "Functional unit completion";
 }
 
 template <class Impl>
 InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
-                                         Params *params)
+                                         DerivO3CPUParams *params)
     : cpu(cpu_ptr),
       iewStage(iew_ptr),
       fuPool(params->fuPool),
       numEntries(params->numIQEntries),
       totalWidth(params->issueWidth),
-      numPhysIntRegs(params->numPhysIntRegs),
-      numPhysFloatRegs(params->numPhysFloatRegs),
       commitToIEWDelay(params->commitToIEWDelay)
 {
     assert(fuPool);
 
-    switchedOut = false;
-
-    numThreads = params->numberOfThreads;
+    numThreads = params->numThreads;
 
-    // Set the number of physical registers as the number of int + float
-    numPhysRegs = numPhysIntRegs + numPhysFloatRegs;
+    // Set the number of total physical registers
+    numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
+        params->numPhysCCRegs;
 
     //Create an entry for each physical register within the
     //dependency graph.
@@ -92,9 +106,9 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
     regScoreboard.resize(numPhysRegs);
 
     //Initialize Mem Dependence Units
-    for (int i = 0; i < numThreads; i++) {
-        memDepUnit[i].init(params,i);
-        memDepUnit[i].setIQ(this);
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        memDepUnit[tid].init(params, tid);
+        memDepUnit[tid].setIQ(this);
     }
 
     resetState();
@@ -110,8 +124,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
         iqPolicy = Dynamic;
 
         //Set Max Entries to Total ROB Capacity
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = numEntries;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = numEntries;
         }
 
     } else if (policy == "partitioned") {
@@ -121,8 +135,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
         int part_amt = numEntries / numThreads;
 
         //Divide ROB up evenly
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = part_amt;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = part_amt;
         }
 
         DPRINTF(IQ, "IQ sharing policy set to Partitioned:"
@@ -135,8 +149,8 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
         int thresholdIQ = (int)((double)threshold * numEntries);
 
         //Divide up by threshold amount
-        for (int i = 0; i < numThreads; i++) {
-            maxEntries[i] = thresholdIQ;
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            maxEntries[tid] = thresholdIQ;
         }
 
         DPRINTF(IQ, "IQ sharing policy set to Threshold:"
@@ -243,14 +257,14 @@ InstructionQueue<Impl>::regStats()
 */
     numIssuedDist
         .init(0,totalWidth,1)
-        .name(name() + ".ISSUE:issued_per_cycle")
+        .name(name() + ".issued_per_cycle")
         .desc("Number of insts issued each cycle")
         .flags(pdf)
         ;
 /*
     dist_unissued
         .init(Num_OpClasses+2)
-        .name(name() + ".ISSUE:unissued_cause")
+        .name(name() + ".unissued_cause")
         .desc("Reason ready instruction not issued")
         .flags(pdf | dist)
         ;
@@ -259,12 +273,12 @@ InstructionQueue<Impl>::regStats()
     }
 */
     statIssuedInstType
-        .init(numThreads,Num_OpClasses)
-        .name(name() + ".ISSUE:FU_type")
+        .init(numThreads,Enums::Num_OpClass)
+        .name(name() + ".FU_type")
         .desc("Type of FU issued")
         .flags(total | pdf | dist)
         ;
-    statIssuedInstType.ysubnames(opClassStrings);
+    statIssuedInstType.ysubnames(Enums::OpClassStrings);
 
     //
     //  How long did instructions for a particular FU type wait prior to issue
@@ -272,7 +286,7 @@ InstructionQueue<Impl>::regStats()
 /*
     issueDelayDist
         .init(Num_OpClasses,0,99,2)
-        .name(name() + ".ISSUE:")
+        .name(name() + ".")
         .desc("cycles from operands ready to issue")
         .flags(pdf | cdf)
         ;
@@ -284,7 +298,7 @@ InstructionQueue<Impl>::regStats()
     }
 */
     issueRate
-        .name(name() + ".ISSUE:rate")
+        .name(name() + ".rate")
         .desc("Inst issue rate")
         .flags(total)
         ;
@@ -292,32 +306,73 @@ InstructionQueue<Impl>::regStats()
 
     statFuBusy
         .init(Num_OpClasses)
-        .name(name() + ".ISSUE:fu_full")
+        .name(name() + ".fu_full")
         .desc("attempts to use FU when none available")
         .flags(pdf | dist)
         ;
     for (int i=0; i < Num_OpClasses; ++i) {
-        statFuBusy.subname(i, opClassStrings[i]);
+        statFuBusy.subname(i, Enums::OpClassStrings[i]);
     }
 
     fuBusy
         .init(numThreads)
-        .name(name() + ".ISSUE:fu_busy_cnt")
+        .name(name() + ".fu_busy_cnt")
         .desc("FU busy when requested")
         .flags(total)
         ;
 
     fuBusyRate
-        .name(name() + ".ISSUE:fu_busy_rate")
+        .name(name() + ".fu_busy_rate")
         .desc("FU busy rate (busy events/executed inst)")
         .flags(total)
         ;
     fuBusyRate = fuBusy / iqInstsIssued;
 
-    for ( int i=0; i < numThreads; i++) {
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
         // Tell mem dependence unit to reg stats as well.
-        memDepUnit[i].regStats();
+        memDepUnit[tid].regStats();
     }
+
+    intInstQueueReads
+        .name(name() + ".int_inst_queue_reads")
+        .desc("Number of integer instruction queue reads")
+        .flags(total);
+
+    intInstQueueWrites
+        .name(name() + ".int_inst_queue_writes")
+        .desc("Number of integer instruction queue writes")
+        .flags(total);
+
+    intInstQueueWakeupAccesses
+        .name(name() + ".int_inst_queue_wakeup_accesses")
+        .desc("Number of integer instruction queue wakeup accesses")
+        .flags(total);
+
+    fpInstQueueReads
+        .name(name() + ".fp_inst_queue_reads")
+        .desc("Number of floating instruction queue reads")
+        .flags(total);
+
+    fpInstQueueWrites
+        .name(name() + ".fp_inst_queue_writes")
+        .desc("Number of floating instruction queue writes")
+        .flags(total);
+
+    fpInstQueueWakeupQccesses
+        .name(name() + ".fp_inst_queue_wakeup_accesses")
+        .desc("Number of floating instruction queue wakeup accesses")
+        .flags(total);
+
+    intAluAccesses
+        .name(name() + ".int_alu_accesses")
+        .desc("Number of integer alu accesses")
+        .flags(total);
+
+    fpAluAccesses
+        .name(name() + ".fp_alu_accesses")
+        .desc("Number of floating point alu accesses")
+        .flags(total);
+
 }
 
 template <class Impl>
@@ -325,9 +380,9 @@ void
 InstructionQueue<Impl>::resetState()
 {
     //Initialize thread IQ counts
-    for (int i = 0; i <numThreads; i++) {
-        count[i] = 0;
-        instList[i].clear();
+    for (ThreadID tid = 0; tid <numThreads; tid++) {
+        count[tid] = 0;
+        instList[tid].clear();
     }
 
     // Initialize the number of free IQ entries.
@@ -342,8 +397,8 @@ InstructionQueue<Impl>::resetState()
         regScoreboard[i] = false;
     }
 
-    for (int i = 0; i < numThreads; ++i) {
-        squashedSeqNum[i] = 0;
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        squashedSeqNum[tid] = 0;
     }
 
     for (int i = 0; i < Num_OpClasses; ++i) {
@@ -354,11 +409,12 @@ InstructionQueue<Impl>::resetState()
     }
     nonSpecInsts.clear();
     listOrder.clear();
+    deferredMemInsts.clear();
 }
 
 template <class Impl>
 void
-InstructionQueue<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+InstructionQueue<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
 {
     activeThreads = at_ptr;
 }
@@ -381,34 +437,24 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
 
 template <class Impl>
 void
-InstructionQueue<Impl>::switchOut()
+InstructionQueue<Impl>::drainSanityCheck() const
 {
-/*
-    if (!instList[0].empty() || (numEntries != freeEntries) ||
-        !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
-        dumpInsts();
-//        assert(0);
-    }
-*/
-    resetState();
-    dependGraph.reset();
-    instsToExecute.clear();
-    switchedOut = true;
-    for (int i = 0; i < numThreads; ++i) {
-        memDepUnit[i].switchOut();
-    }
+    assert(dependGraph.empty());
+    assert(instsToExecute.empty());
+    for (ThreadID tid = 0; tid < numThreads; ++tid)
+        memDepUnit[tid].drainSanityCheck();
 }
 
 template <class Impl>
 void
 InstructionQueue<Impl>::takeOverFrom()
 {
-    switchedOut = false;
+    resetState();
 }
 
 template <class Impl>
 int
-InstructionQueue<Impl>::entryAmount(int num_threads)
+InstructionQueue<Impl>::entryAmount(ThreadID num_threads)
 {
     if (iqPolicy == Partitioned) {
         return numEntries / num_threads;
@@ -425,11 +471,11 @@ InstructionQueue<Impl>::resetEntries()
     if (iqPolicy != Dynamic || numThreads > 1) {
         int active_threads = activeThreads->size();
 
-        std::list<unsigned>::iterator threads = activeThreads->begin();
-        std::list<unsigned>::iterator end = activeThreads->end();
+        list<ThreadID>::iterator threads = activeThreads->begin();
+        list<ThreadID>::iterator end = activeThreads->end();
 
         while (threads != end) {
-            unsigned tid = *threads++;
+            ThreadID tid = *threads++;
 
             if (iqPolicy == Partitioned) {
                 maxEntries[tid] = numEntries / active_threads;
@@ -449,7 +495,7 @@ InstructionQueue<Impl>::numFreeEntries()
 
 template <class Impl>
 unsigned
-InstructionQueue<Impl>::numFreeEntries(unsigned tid)
+InstructionQueue<Impl>::numFreeEntries(ThreadID tid)
 {
     return maxEntries[tid] - count[tid];
 }
@@ -469,7 +515,7 @@ InstructionQueue<Impl>::isFull()
 
 template <class Impl>
 bool
-InstructionQueue<Impl>::isFull(unsigned tid)
+InstructionQueue<Impl>::isFull(ThreadID tid)
 {
     if (numFreeEntries(tid) == 0) {
         return(true);
@@ -499,11 +545,12 @@ template <class Impl>
 void
 InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
 {
+    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
     // Make sure the instruction is valid
     assert(new_inst);
 
-    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %#x to the IQ.\n",
-            new_inst->seqNum, new_inst->readPC());
+    DPRINTF(IQ, "Adding instruction [sn:%lli] PC %s to the IQ.\n",
+            new_inst->seqNum, new_inst->pcState());
 
     assert(freeEntries != 0);
 
@@ -540,14 +587,15 @@ InstructionQueue<Impl>::insertNonSpec(DynInstPtr &new_inst)
 {
     // @todo: Clean up this code; can do it by setting inst as unable
     // to issue, then calling normal insert on the inst.
+    new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
 
     assert(new_inst);
 
     nonSpecInsts[new_inst->seqNum] = new_inst;
 
-    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %#x "
+    DPRINTF(IQ, "Adding non-speculative instruction [sn:%lli] PC %s "
             "to the IQ.\n",
-            new_inst->seqNum, new_inst->readPC());
+            new_inst->seqNum, new_inst->pcState());
 
     assert(freeEntries != 0);
 
@@ -590,6 +638,11 @@ InstructionQueue<Impl>::getInstToExecute()
     assert(!instsToExecute.empty());
     DynInstPtr inst = instsToExecute.front();
     instsToExecute.pop_front();
+    if (inst->isFloating()){
+        fpInstQueueReads++;
+    } else {
+        intInstQueueReads++;
+    }
     return inst;
 }
 
@@ -651,14 +704,9 @@ void
 InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
 {
     DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
+    assert(!cpu->switchedOut());
     // The CPU could have been sleeping until this op completed (*extremely*
     // long latency op).  Wake it if it was.  This may be overkill.
-    if (isSwitchedOut()) {
-        DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
-                inst->seqNum);
-        return;
-    }
-
     iewStage->wakeCPU();
 
     if (fu_idx > -1)
@@ -667,7 +715,7 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
     // @todo: Ensure that these FU Completions happen at the beginning
     // of a cycle, otherwise they could add too many instructions to
     // the queue.
-    issueToExecuteQueue->access(0)->size++;
+    issueToExecuteQueue->access(-1)->size++;
     instsToExecute.push_back(inst);
 }
 
@@ -683,6 +731,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
     IssueStruct *i2e_info = issueToExecuteQueue->access(0);
 
+    DynInstPtr deferred_mem_inst;
+    int total_deferred_mem_issued = 0;
+    while (total_deferred_mem_issued < totalWidth &&
+           (deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
+        issueToExecuteQueue->access(0)->size++;
+        instsToExecute.push_back(deferred_mem_inst);
+        total_deferred_mem_issued++;
+    }
+
     // Have iterator to head of the list
     // While I haven't exceeded bandwidth or reached the end of the list,
     // Try to get a FU that can do what this op needs.
@@ -695,7 +752,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     ListOrderIt order_end_it = listOrder.end();
     int total_issued = 0;
 
-    while (total_issued < totalWidth &&
+    while (total_issued < (totalWidth - total_deferred_mem_issued) &&
            iewStage->canIssue() &&
            order_it != order_end_it) {
         OpClass op_class = (*order_it).queueType;
@@ -704,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
         DynInstPtr issuing_inst = readyInsts[op_class].top();
 
+        issuing_inst->isFloating() ? fpInstQueueReads++ : intInstQueueReads++;
+
         assert(issuing_inst->seqNum == (*order_it).oldestInst);
 
         if (issuing_inst->isSquashed()) {
@@ -724,12 +783,12 @@ InstructionQueue<Impl>::scheduleReadyInsts()
         }
 
         int idx = -2;
-        int op_latency = 1;
-        int tid = issuing_inst->threadNumber;
+        Cycles op_latency = Cycles(1);
+        ThreadID tid = issuing_inst->threadNumber;
 
         if (op_class != No_OpClass) {
             idx = fuPool->getUnit(op_class);
-
+            issuing_inst->isFloating() ? fpAluAccesses++ : intAluAccesses++;
             if (idx > -1) {
                 op_latency = fuPool->getOpLatency(op_class);
             }
@@ -738,7 +797,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
         // If we have an instruction that doesn't require a FU, or a
         // valid FU, then schedule for execution.
         if (idx == -2 || idx != -1) {
-            if (op_latency == 1) {
+            if (op_latency == Cycles(1)) {
                 i2e_info->size++;
                 instsToExecute.push_back(issuing_inst);
 
@@ -747,15 +806,16 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 if (idx >= 0)
                     fuPool->freeUnitNextCycle(idx);
             } else {
-                int issue_latency = fuPool->getIssueLatency(op_class);
+                Cycles issue_latency = fuPool->getIssueLatency(op_class);
                 // Generate completion event for the FU
                 FUCompletion *execution = new FUCompletion(issuing_inst,
                                                            idx, this);
 
-                execution->schedule(curTick + cpu->cycles(issue_latency - 1));
+                cpu->schedule(execution,
+                              cpu->clockEdge(Cycles(op_latency - 1)));
 
                 // @todo: Enforce that issue_latency == 1 or op_latency
-                if (issue_latency > 1) {
+                if (issue_latency > Cycles(1)) {
                     // If FU isn't pipelined, then it must be freed
                     // upon the execution completing.
                     execution->setFreeFU();
@@ -765,9 +825,9 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 }
             }
 
-            DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
+            DPRINTF(IQ, "Thread %i: Issuing instruction PC %s "
                     "[sn:%lli]\n",
-                    tid, issuing_inst->readPC(),
+                    tid, issuing_inst->pcState(),
                     issuing_inst->seqNum);
 
             readyInsts[op_class].pop();
@@ -782,6 +842,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
             issuing_inst->setIssued();
             ++total_issued;
 
+#if TRACING_ON
+            issuing_inst->issueTick = curTick() - issuing_inst->fetchTick;
+#endif
+
             if (!issuing_inst->isMemRef()) {
                 // Memory instructions can not be freed from the IQ until they
                 // complete.
@@ -806,7 +870,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
     iqInstsIssued+= total_issued;
 
     // If we issued any instructions, tell the CPU we had activity.
-    if (total_issued) {
+    // @todo If the way deferred memory instructions are handeled due to
+    // translation changes then the deferredMemInsts condition should be removed
+    // from the code below.
+    if (total_issued || total_deferred_mem_issued || deferredMemInsts.size()) {
         cpu->activityThisCycle();
     } else {
         DPRINTF(IQ, "Not able to schedule any instructions.\n");
@@ -824,7 +891,7 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
     assert(inst_it != nonSpecInsts.end());
 
-    unsigned tid = (*inst_it).second->threadNumber;
+    ThreadID tid = (*inst_it).second->threadNumber;
 
     (*inst_it).second->setAtCommit();
 
@@ -843,7 +910,7 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
 
 template <class Impl>
 void
-InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
+InstructionQueue<Impl>::commit(const InstSeqNum &inst, ThreadID tid)
 {
     DPRINTF(IQ, "[tid:%i]: Committing instructions older than [sn:%i]\n",
             tid,inst);
@@ -865,6 +932,13 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
 {
     int dependents = 0;
 
+    // The instruction queue here takes care of both floating and int ops
+    if (completed_inst->isFloating()) {
+        fpInstQueueWakeupQccesses++;
+    } else {
+        intInstQueueWakeupAccesses++;
+    }
+
     DPRINTF(IQ, "Waking dependents of completed instruction.\n");
 
     assert(!completed_inst->isSquashed());
@@ -894,6 +968,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
         // handled by the IQ and thus have no dependency graph entry.
         // @todo Figure out a cleaner way to handle this.
         if (dest_reg >= numPhysRegs) {
+            DPRINTF(IQ, "dest_reg :%d, numPhysRegs: %d\n", dest_reg,
+                    numPhysRegs);
             continue;
         }
 
@@ -905,8 +981,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
         DynInstPtr dep_inst = dependGraph.pop(dest_reg);
 
         while (dep_inst) {
-            DPRINTF(IQ, "Waking up a dependent instruction, PC%#x.\n",
-                    dep_inst->readPC());
+            DPRINTF(IQ, "Waking up a dependent instruction, [sn:%lli] "
+                    "PC %s.\n", dep_inst->seqNum, dep_inst->pcState());
 
             // Might want to give more information to the instruction
             // so that it knows which of its source registers is
@@ -951,8 +1027,8 @@ InstructionQueue<Impl>::addReadyMemInst(DynInstPtr &ready_inst)
     }
 
     DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
-            "the ready list, PC %#x opclass:%i [sn:%lli].\n",
-            ready_inst->readPC(), op_class, ready_inst->seqNum);
+            "the ready list, PC %s opclass:%i [sn:%lli].\n",
+            ready_inst->pcState(), op_class, ready_inst->seqNum);
 }
 
 template <class Impl>
@@ -960,6 +1036,11 @@ void
 InstructionQueue<Impl>::rescheduleMemInst(DynInstPtr &resched_inst)
 {
     DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum);
+
+    // Reset DTB translation state
+    resched_inst->translationStarted(false);
+    resched_inst->translationCompleted(false);
+
     resched_inst->clearCanIssue();
     memDepUnit[resched_inst->threadNumber].reschedule(resched_inst);
 }
@@ -975,30 +1056,53 @@ template <class Impl>
 void
 InstructionQueue<Impl>::completeMemInst(DynInstPtr &completed_inst)
 {
-    int tid = completed_inst->threadNumber;
+    ThreadID tid = completed_inst->threadNumber;
 
-    DPRINTF(IQ, "Completing mem instruction PC:%#x [sn:%lli]\n",
-            completed_inst->readPC(), completed_inst->seqNum);
+    DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%lli]\n",
+            completed_inst->pcState(), completed_inst->seqNum);
 
     ++freeEntries;
 
-    completed_inst->memOpDone = true;
+    completed_inst->memOpDone(true);
 
     memDepUnit[tid].completed(completed_inst);
     count[tid]--;
 }
 
+template <class Impl>
+void
+InstructionQueue<Impl>::deferMemInst(DynInstPtr &deferred_inst)
+{
+    deferredMemInsts.push_back(deferred_inst);
+}
+
+template <class Impl>
+typename Impl::DynInstPtr
+InstructionQueue<Impl>::getDeferredMemInstToExecute()
+{
+    for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
+         ++it) {
+        if ((*it)->translationCompleted() || (*it)->isSquashed()) {
+            DynInstPtr ret = *it;
+            deferredMemInsts.erase(it);
+            return ret;
+        }
+    }
+    return NULL;
+}
+
 template <class Impl>
 void
 InstructionQueue<Impl>::violation(DynInstPtr &store,
                                   DynInstPtr &faulting_load)
 {
+    intInstQueueWrites++;
     memDepUnit[store->threadNumber].violation(store, faulting_load);
 }
 
 template <class Impl>
 void
-InstructionQueue<Impl>::squash(unsigned tid)
+InstructionQueue<Impl>::squash(ThreadID tid)
 {
     DPRINTF(IQ, "[tid:%i]: Starting to squash instructions in "
             "the IQ.\n", tid);
@@ -1018,7 +1122,7 @@ InstructionQueue<Impl>::squash(unsigned tid)
 
 template <class Impl>
 void
-InstructionQueue<Impl>::doSquash(unsigned tid)
+InstructionQueue<Impl>::doSquash(ThreadID tid)
 {
     // Start at the tail.
     ListIt squash_it = instList[tid].end();
@@ -1033,6 +1137,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
            (*squash_it)->seqNum > squashedSeqNum[tid]) {
 
         DynInstPtr squashed_inst = (*squash_it);
+        squashed_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++;
 
         // Only handle the instruction if it actually is in the IQ and
         // hasn't already been squashed in the IQ.
@@ -1044,11 +1149,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
 
         if (!squashed_inst->isIssued() ||
             (squashed_inst->isMemRef() &&
-             !squashed_inst->memOpDone)) {
+             !squashed_inst->memOpDone())) {
 
-            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
-                    "squashed.\n",
-                    tid, squashed_inst->seqNum, squashed_inst->readPC());
+            DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %s squashed.\n",
+                    tid, squashed_inst->seqNum, squashed_inst->pcState());
 
             // Remove the instruction from the dependency list.
             if (!squashed_inst->isNonSpeculative() &&
@@ -1084,7 +1188,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
                        !squashed_inst->isCompleted()) {
                 NonSpecMapIt ns_inst_it =
                     nonSpecInsts.find(squashed_inst->seqNum);
-                assert(ns_inst_it != nonSpecInsts.end());
+
                 if (ns_inst_it == nonSpecInsts.end()) {
                     assert(squashed_inst->getFault() != NoFault);
                 } else {
@@ -1143,9 +1247,9 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
             if (src_reg >= numPhysRegs) {
                 continue;
             } else if (regScoreboard[src_reg] == false) {
-                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                DPRINTF(IQ, "Instruction PC %s has src reg %i that "
                         "is being added to the dependency chain.\n",
-                        new_inst->readPC(), src_reg);
+                        new_inst->pcState(), src_reg);
 
                 dependGraph.insert(src_reg, new_inst);
 
@@ -1153,9 +1257,9 @@ InstructionQueue<Impl>::addToDependents(DynInstPtr &new_inst)
                 // was added to the dependency graph.
                 return_val = true;
             } else {
-                DPRINTF(IQ, "Instruction PC %#x has src reg %i that "
+                DPRINTF(IQ, "Instruction PC %s has src reg %i that "
                         "became ready before it reached the IQ.\n",
-                        new_inst->readPC(), src_reg);
+                        new_inst->pcState(), src_reg);
                 // Mark a register ready within the instruction.
                 new_inst->markSrcRegReady(src_reg_idx);
             }
@@ -1224,8 +1328,8 @@ InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
         OpClass op_class = inst->opClass();
 
         DPRINTF(IQ, "Instruction is ready to issue, putting it onto "
-                "the ready list, PC %#x opclass:%i [sn:%lli].\n",
-                inst->readPC(), op_class, inst->seqNum);
+                "the ready list, PC %s opclass:%i [sn:%lli].\n",
+                inst->pcState(), op_class, inst->seqNum);
 
         readyInsts[op_class].push(inst);
 
@@ -1252,10 +1356,10 @@ InstructionQueue<Impl>::countInsts()
     // Change the #if if you want to use this method.
     int total_insts = 0;
 
-    for (int i = 0; i < numThreads; ++i) {
-        ListIt count_it = instList[i].begin();
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        ListIt count_it = instList[tid].begin();
 
-        while (count_it != instList[i].end()) {
+        while (count_it != instList[tid].end()) {
             if (!(*count_it)->isSquashed() && !(*count_it)->isSquashedInIQ()) {
                 if (!(*count_it)->isIssued()) {
                     ++total_insts;
@@ -1295,7 +1399,7 @@ InstructionQueue<Impl>::dumpLists()
     cprintf("Non speculative list: ");
 
     while (non_spec_it != non_spec_end_it) {
-        cprintf("%#x [sn:%lli]", (*non_spec_it).second->readPC(),
+        cprintf("%s [sn:%lli]", (*non_spec_it).second->pcState(),
                 (*non_spec_it).second->seqNum);
         ++non_spec_it;
     }
@@ -1324,21 +1428,19 @@ template <class Impl>
 void
 InstructionQueue<Impl>::dumpInsts()
 {
-    for (int i = 0; i < numThreads; ++i) {
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
         int num = 0;
         int valid_num = 0;
-        ListIt inst_list_it = instList[i].begin();
+        ListIt inst_list_it = instList[tid].begin();
 
-        while (inst_list_it != instList[i].end())
-        {
-            cprintf("Instruction:%i\n",
-                    num);
+        while (inst_list_it != instList[tid].end()) {
+            cprintf("Instruction:%i\n", num);
             if (!(*inst_list_it)->isSquashed()) {
                 if (!(*inst_list_it)->isIssued()) {
                     ++valid_num;
                     cprintf("Count:%i\n", valid_num);
                 } else if ((*inst_list_it)->isMemRef() &&
-                           !(*inst_list_it)->memOpDone) {
+                           !(*inst_list_it)->memOpDone()) {
                     // Loads that have not been marked as executed
                     // still count towards the total instructions.
                     ++valid_num;
@@ -1346,16 +1448,16 @@ InstructionQueue<Impl>::dumpInsts()
                 }
             }
 
-            cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+            cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
                     "Issued:%i\nSquashed:%i\n",
-                    (*inst_list_it)->readPC(),
+                    (*inst_list_it)->pcState(),
                     (*inst_list_it)->seqNum,
                     (*inst_list_it)->threadNumber,
                     (*inst_list_it)->isIssued(),
                     (*inst_list_it)->isSquashed());
 
             if ((*inst_list_it)->isMemRef()) {
-                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+                cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
             }
 
             cprintf("\n");
@@ -1380,7 +1482,7 @@ InstructionQueue<Impl>::dumpInsts()
                 ++valid_num;
                 cprintf("Count:%i\n", valid_num);
             } else if ((*inst_list_it)->isMemRef() &&
-                       !(*inst_list_it)->memOpDone) {
+                       !(*inst_list_it)->memOpDone()) {
                 // Loads that have not been marked as executed
                 // still count towards the total instructions.
                 ++valid_num;
@@ -1388,16 +1490,16 @@ InstructionQueue<Impl>::dumpInsts()
             }
         }
 
-        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+        cprintf("PC: %s\n[sn:%lli]\n[tid:%i]\n"
                 "Issued:%i\nSquashed:%i\n",
-                (*inst_list_it)->readPC(),
+                (*inst_list_it)->pcState(),
                 (*inst_list_it)->seqNum,
                 (*inst_list_it)->threadNumber,
                 (*inst_list_it)->isIssued(),
                 (*inst_list_it)->isSquashed());
 
         if ((*inst_list_it)->isMemRef()) {
-            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone());
         }
 
         cprintf("\n");