CPU: Get rid of the now unnecessary getInst/setInst family of functions.
[gem5.git] / src / cpu / ozone / lw_back_end_impl.hh
index dcd7a0d7edcd9a38b7cc7cf6b0abb8778e161539..465fccbdbd5d895b4355ebbe97a123abcb9a27a4 100644 (file)
@@ -28,8 +28,8 @@
  * Authors: Kevin Lim
  */
 
+#include "config/the_isa.hh"
 #include "config/use_checker.hh"
-
 #include "cpu/ozone/lw_back_end.hh"
 #include "cpu/op_class.hh"
 
@@ -45,7 +45,7 @@ LWBackEnd<Impl>::generateTrapEvent(Tick latency)
 
     TrapEvent *trap = new TrapEvent(this);
 
-    trap->schedule(curTick + cpu->cycles(latency));
+    trap->schedule(curTick + cpu->ticks(latency));
 
     thread->trapPending = true;
 }
@@ -119,9 +119,9 @@ LWBackEnd<Impl>::TrapEvent::process()
 
 template <class Impl>
 const char *
-LWBackEnd<Impl>::TrapEvent::description()
+LWBackEnd<Impl>::TrapEvent::description() const
 {
-    return "Trap event";
+    return "Trap";
 }
 
 template <class Impl>
@@ -141,13 +141,14 @@ LWBackEnd<Impl>::replayMemInst(DynInstPtr &inst)
 
 template <class Impl>
 LWBackEnd<Impl>::LWBackEnd(Params *params)
-    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
       trapSquash(false), tcSquash(false),
-      width(params->backEndWidth), exactFullStall(true)
+      latency(params->backEndLatency),
+      width(params->backEndWidth), lsqLimits(params->lsqLimits),
+      exactFullStall(true)
 {
     numROBEntries = params->numROBEntries;
     numInsts = 0;
-    numDispatchEntries = 32;
     maxOutstandingMemOps = params->maxOutstandingMemOps;
     numWaitingMemOps = 0;
     waitingInsts = 0;
@@ -169,6 +170,7 @@ LWBackEnd<Impl>::LWBackEnd(Params *params)
     LSQ.init(params, params->LQEntries, params->SQEntries, 0);
 
     dispatchStatus = Running;
+    commitStatus = Running;
 }
 
 template <class Impl>
@@ -183,79 +185,80 @@ void
 LWBackEnd<Impl>::regStats()
 {
     using namespace Stats;
-    rob_cap_events
-        .init(cpu->number_of_threads)
+    LSQ.regStats();
+
+    robCapEvents
+        .init(cpu->numThreads)
         .name(name() + ".ROB:cap_events")
         .desc("number of cycles where ROB cap was active")
         .flags(total)
         ;
 
-    rob_cap_inst_count
-        .init(cpu->number_of_threads)
+    robCapInstCount
+        .init(cpu->numThreads)
         .name(name() + ".ROB:cap_inst")
         .desc("number of instructions held up by ROB cap")
         .flags(total)
         ;
 
-    iq_cap_events
-        .init(cpu->number_of_threads)
+    iqCapEvents
+        .init(cpu->numThreads)
         .name(name() +".IQ:cap_events" )
         .desc("number of cycles where IQ cap was active")
         .flags(total)
         ;
 
-    iq_cap_inst_count
-        .init(cpu->number_of_threads)
+    iqCapInstCount
+        .init(cpu->numThreads)
         .name(name() + ".IQ:cap_inst")
         .desc("number of instructions held up by IQ cap")
         .flags(total)
         ;
 
-
-    exe_inst
-        .init(cpu->number_of_threads)
+    exeInst
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:count")
         .desc("number of insts issued")
         .flags(total)
         ;
 
-    exe_swp
-        .init(cpu->number_of_threads)
+    exeSwp
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:swp")
         .desc("number of swp insts issued")
         .flags(total)
         ;
 
-    exe_nop
-        .init(cpu->number_of_threads)
+    exeNop
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:nop")
         .desc("number of nop insts issued")
         .flags(total)
         ;
 
-    exe_refs
-        .init(cpu->number_of_threads)
+    exeRefs
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:refs")
         .desc("number of memory reference insts issued")
         .flags(total)
         ;
 
-    exe_loads
-        .init(cpu->number_of_threads)
+    exeLoads
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:loads")
         .desc("number of load insts issued")
         .flags(total)
         ;
 
-    exe_branches
-        .init(cpu->number_of_threads)
+    exeBranches
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:branches")
         .desc("Number of branches issued")
         .flags(total)
         ;
 
-    issued_ops
-        .init(cpu->number_of_threads)
+    issuedOps
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:op_count")
         .desc("number of insts issued")
         .flags(total)
@@ -271,29 +274,29 @@ LWBackEnd<Impl>::regStats()
     //
     //  Other stats
     //
-    lsq_forw_loads
-        .init(cpu->number_of_threads)
+    lsqForwLoads
+        .init(cpu->numThreads)
         .name(name() + ".LSQ:forw_loads")
         .desc("number of loads forwarded via LSQ")
         .flags(total)
         ;
 
-    inv_addr_loads
-        .init(cpu->number_of_threads)
+    invAddrLoads
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:addr_loads")
         .desc("number of invalid-address loads")
         .flags(total)
         ;
 
-    inv_addr_swpfs
-        .init(cpu->number_of_threads)
+    invAddrSwpfs
+        .init(cpu->numThreads)
         .name(name() + ".ISSUE:addr_swpfs")
         .desc("number of invalid-address SW prefetches")
         .flags(total)
         ;
 
-    lsq_blocked_loads
-        .init(cpu->number_of_threads)
+    lsqBlockedLoads
+        .init(cpu->numThreads)
         .name(name() + ".LSQ:blocked_loads")
         .desc("number of ready loads not issued due to memory disambiguation")
         .flags(total)
@@ -304,123 +307,124 @@ LWBackEnd<Impl>::regStats()
         .desc("Number of times LSQ instruction issued early")
         ;
 
-    n_issued_dist
+    nIssuedDist
         .init(issueWidth + 1)
         .name(name() + ".ISSUE:issued_per_cycle")
         .desc("Number of insts issued each cycle")
         .flags(total | pdf | dist)
         ;
-    issue_delay_dist
+/*
+    issueDelayDist
         .init(Num_OpClasses,0,99,2)
         .name(name() + ".ISSUE:")
         .desc("cycles from operands ready to issue")
         .flags(pdf | cdf)
         ;
 
-    queue_res_dist
+    queueResDist
         .init(Num_OpClasses, 0, 99, 2)
         .name(name() + ".IQ:residence:")
         .desc("cycles from dispatch to issue")
         .flags(total | pdf | cdf )
         ;
     for (int i = 0; i < Num_OpClasses; ++i) {
-        queue_res_dist.subname(i, opClassStrings[i]);
+        queueResDist.subname(i, opClassStrings[i]);
     }
-
-    writeback_count
-        .init(cpu->number_of_threads)
+*/
+    writebackCount
+        .init(cpu->numThreads)
         .name(name() + ".WB:count")
         .desc("cumulative count of insts written-back")
         .flags(total)
         ;
 
-    producer_inst
-        .init(cpu->number_of_threads)
+    producerInst
+        .init(cpu->numThreads)
         .name(name() + ".WB:producers")
         .desc("num instructions producing a value")
         .flags(total)
         ;
 
-    consumer_inst
-        .init(cpu->number_of_threads)
+    consumerInst
+        .init(cpu->numThreads)
         .name(name() + ".WB:consumers")
         .desc("num instructions consuming a value")
         .flags(total)
         ;
 
-    wb_penalized
-        .init(cpu->number_of_threads)
+    wbPenalized
+        .init(cpu->numThreads)
         .name(name() + ".WB:penalized")
         .desc("number of instrctions required to write to 'other' IQ")
         .flags(total)
         ;
 
 
-    wb_penalized_rate
+    wbPenalizedRate
         .name(name() + ".WB:penalized_rate")
         .desc ("fraction of instructions written-back that wrote to 'other' IQ")
         .flags(total)
         ;
 
-    wb_penalized_rate = wb_penalized / writeback_count;
+    wbPenalizedRate = wbPenalized / writebackCount;
 
-    wb_fanout
+    wbFanout
         .name(name() + ".WB:fanout")
         .desc("average fanout of values written-back")
         .flags(total)
         ;
 
-    wb_fanout = producer_inst / consumer_inst;
+    wbFanout = producerInst / consumerInst;
 
-    wb_rate
+    wbRate
         .name(name() + ".WB:rate")
         .desc("insts written-back per cycle")
         .flags(total)
         ;
-    wb_rate = writeback_count / cpu->numCycles;
+    wbRate = writebackCount / cpu->numCycles;
 
-    stat_com_inst
-        .init(cpu->number_of_threads)
+    statComInst
+        .init(cpu->numThreads)
         .name(name() + ".COM:count")
         .desc("Number of instructions committed")
         .flags(total)
         ;
 
-    stat_com_swp
-        .init(cpu->number_of_threads)
+    statComSwp
+        .init(cpu->numThreads)
         .name(name() + ".COM:swp_count")
         .desc("Number of s/w prefetches committed")
         .flags(total)
         ;
 
-    stat_com_refs
-        .init(cpu->number_of_threads)
+    statComRefs
+        .init(cpu->numThreads)
         .name(name() +  ".COM:refs")
         .desc("Number of memory references committed")
         .flags(total)
         ;
 
-    stat_com_loads
-        .init(cpu->number_of_threads)
+    statComLoads
+        .init(cpu->numThreads)
         .name(name() +  ".COM:loads")
         .desc("Number of loads committed")
         .flags(total)
         ;
 
-    stat_com_membars
-        .init(cpu->number_of_threads)
+    statComMembars
+        .init(cpu->numThreads)
         .name(name() +  ".COM:membars")
         .desc("Number of memory barriers committed")
         .flags(total)
         ;
 
-    stat_com_branches
-        .init(cpu->number_of_threads)
+    statComBranches
+        .init(cpu->numThreads)
         .name(name() + ".COM:branches")
         .desc("Number of branches committed")
         .flags(total)
         ;
-    n_committed_dist
+    nCommittedDist
         .init(0,commitWidth,1)
         .name(name() + ".COM:committed_per_cycle")
         .desc("Number of insts commited each cycle")
@@ -440,61 +444,62 @@ LWBackEnd<Impl>::regStats()
     //  -> The standard deviation is computed only over cycles where
     //  we reached the BW limit
     //
-    commit_eligible
-        .init(cpu->number_of_threads)
+    commitEligible
+        .init(cpu->numThreads)
         .name(name() + ".COM:bw_limited")
         .desc("number of insts not committed due to BW limits")
         .flags(total)
         ;
 
-    commit_eligible_samples
+    commitEligibleSamples
         .name(name() + ".COM:bw_lim_events")
         .desc("number cycles where commit BW limit reached")
         ;
 
     squashedInsts
-        .init(cpu->number_of_threads)
+        .init(cpu->numThreads)
         .name(name() + ".COM:squashed_insts")
         .desc("Number of instructions removed from inst list")
         ;
 
     ROBSquashedInsts
-        .init(cpu->number_of_threads)
+        .init(cpu->numThreads)
         .name(name() + ".COM:rob_squashed_insts")
         .desc("Number of instructions removed from inst list when they reached the head of the ROB")
         ;
 
-    ROB_fcount
+    ROBFcount
         .name(name() + ".ROB:full_count")
         .desc("number of cycles where ROB was full")
         ;
 
-    ROB_count
-        .init(cpu->number_of_threads)
+    ROBCount
+        .init(cpu->numThreads)
         .name(name() + ".ROB:occupancy")
         .desc(name() + ".ROB occupancy (cumulative)")
         .flags(total)
         ;
 
-    ROB_full_rate
+    ROBFullRate
         .name(name() + ".ROB:full_rate")
         .desc("ROB full per cycle")
         ;
-    ROB_full_rate = ROB_fcount / cpu->numCycles;
+    ROBFullRate = ROBFcount / cpu->numCycles;
 
-    ROB_occ_rate
+    ROBOccRate
         .name(name() + ".ROB:occ_rate")
         .desc("ROB occupancy rate")
         .flags(total)
         ;
-    ROB_occ_rate = ROB_count / cpu->numCycles;
-
-    ROB_occ_dist
-        .init(cpu->number_of_threads,0,numROBEntries,2)
+    ROBOccRate = ROBCount / cpu->numCycles;
+/*
+    ROBOccDist
+        .init(cpu->numThreads, 0, numROBEntries, 2)
         .name(name() + ".ROB:occ_dist")
         .desc("ROB Occupancy per cycle")
         .flags(total | cdf)
         ;
+*/
 }
 
 template <class Impl>
@@ -520,11 +525,7 @@ template <class Impl>
 void
 LWBackEnd<Impl>::checkInterrupts()
 {
-    if (cpu->checkInterrupts &&
-        cpu->check_interrupts() &&
-        !cpu->inPalMode(thread->readPC()) &&
-        !trapSquash &&
-        !tcSquash) {
+    if (cpu->checkInterrupts(tc) && !trapSquash && !tcSquash) {
         frontEnd->interruptPending = true;
         if (robEmpty() && !LSQ.hasStoresToWB()) {
             // Will need to squash all instructions currently in flight and have
@@ -556,6 +557,7 @@ LWBackEnd<Impl>::checkInterrupts()
         }
     }
 }
+#endif
 
 template <class Impl>
 void
@@ -579,7 +581,6 @@ LWBackEnd<Impl>::handleFault(Fault &fault, Tick latency)
     // Generate trap squash event.
     generateTrapEvent(latency);
 }
-#endif
 
 template <class Impl>
 void
@@ -587,20 +588,25 @@ LWBackEnd<Impl>::tick()
 {
     DPRINTF(BE, "Ticking back end\n");
 
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
     if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
         cpu->signalSwitched();
         return;
     }
 
-    ROB_count[0]+= numInsts;
+    readyInstsForCommit();
 
-    wbCycle = 0;
+    numInstsToWB.advance();
 
-    // Read in any done instruction information and update the IQ or LSQ.
-    updateStructures();
+    ROBCount[0]+= numInsts;
+
+    wbCycle = 0;
 
 #if FULL_SYSTEM
     checkInterrupts();
+#endif
 
     if (trapSquash) {
         assert(!tcSquash);
@@ -608,7 +614,6 @@ LWBackEnd<Impl>::tick()
     } else if (tcSquash) {
         squashFromTC();
     }
-#endif
 
     if (dispatchStatus != Blocked) {
         dispatchInsts();
@@ -673,6 +678,10 @@ LWBackEnd<Impl>::dispatchInsts()
     while (numInsts < numROBEntries &&
            numWaitingMemOps < maxOutstandingMemOps) {
         // Get instruction from front of time buffer
+        if (lsqLimits && LSQ.isFull()) {
+            break;
+        }
+
         DynInstPtr inst = frontEnd->getInst();
         if (!inst) {
             break;
@@ -731,6 +740,7 @@ LWBackEnd<Impl>::dispatchInsts()
                 inst->setIssued();
                 inst->setExecuted();
                 inst->setCanCommit();
+                numInstsToWB[0]++;
             } else {
                 DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
                         "exeList.\n",
@@ -865,8 +875,17 @@ LWBackEnd<Impl>::executeInsts()
             if (inst->isLoad()) {
                 LSQ.executeLoad(inst);
             } else if (inst->isStore()) {
-                LSQ.executeStore(inst);
-                if (inst->req && !(inst->req->getFlags() & LOCKED)) {
+                Fault fault = LSQ.executeStore(inst);
+
+                if (!inst->isStoreConditional() && fault == NoFault) {
+                    inst->setExecuted();
+
+                    instToCommit(inst);
+                } else if (fault != NoFault) {
+                    // If the instruction faulted, then we need to send it along to commit
+                    // without the instruction completing.
+                    // Send this instruction to commit, also make sure iew stage
+                    // realizes there is activity.
                     inst->setExecuted();
 
                     instToCommit(inst);
@@ -907,36 +926,54 @@ LWBackEnd<Impl>::executeInsts()
         }
     }
 
-    issued_ops[0]+= num_executed;
-    n_issued_dist[num_executed]++;
+    issuedOps[0]+= num_executed;
+    nIssuedDist[num_executed]++;
 }
 
 template<class Impl>
 void
 LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
 {
-
     DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
             inst->seqNum, inst->readPC());
 
     if (!inst->isSquashed()) {
-        DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
-                inst->seqNum, inst->readPC());
-
-        inst->setCanCommit();
-
         if (inst->isExecuted()) {
             inst->setResultReady();
             int dependents = wakeDependents(inst);
             if (dependents) {
-                producer_inst[0]++;
-                consumer_inst[0]+= dependents;
+                producerInst[0]++;
+                consumerInst[0]+= dependents;
             }
         }
     }
 
-    writeback_count[0]++;
+    writeback.push_back(inst);
+
+    numInstsToWB[0]++;
+
+    writebackCount[0]++;
+}
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+    for (int i = numInstsToWB[-latency];
+         !writeback.empty() && i;
+         --i)
+    {
+        DynInstPtr inst = writeback.front();
+        writeback.pop_front();
+        if (!inst->isSquashed()) {
+            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            inst->setCanCommit();
+        }
+    }
 }
+
 #if 0
 template <class Impl>
 void
@@ -1009,7 +1046,7 @@ LWBackEnd<Impl>::commitInst(int inst_num)
     // or store inst.  Signal backwards that it should be executed.
     if (!inst->isExecuted()) {
         if (inst->isNonSpeculative() ||
-            inst->isStoreConditional() ||
+            (inst->isStoreConditional() && inst->getFault() == NoFault) ||
             inst->isMemBarrier() ||
             inst->isWriteBarrier()) {
 #if !FULL_SYSTEM
@@ -1134,15 +1171,8 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         }
 #endif
 
-        thread->setInst(
-            static_cast<TheISA::MachInst>(inst->staticInst->machInst));
-#if FULL_SYSTEM
         handleFault(inst_fault);
         return false;
-#else // !FULL_SYSTEM
-        panic("fault (%d) detected @ PC %08p", inst_fault,
-              inst->PC);
-#endif // FULL_SYSTEM
     }
 
     int freed_regs = 0;
@@ -1154,6 +1184,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         ++freed_regs;
     }
 
+#if FULL_SYSTEM
+    if (thread->profile) {
+//        bool usermode =
+//            (xc->readMiscRegNoEffect(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+//        thread->profilePC = usermode ? 1 : inst->readPC();
+        thread->profilePC = inst->readPC();
+        ProfileNode *node = thread->profile->consume(thread->getTC(),
+                                                     inst->staticInst);
+
+        if (node)
+            thread->profileNode = node;
+    }
+#endif
+
     if (inst->traceData) {
         inst->traceData->setFetchSeq(inst->seqNum);
         inst->traceData->setCPSeq(thread->numInst);
@@ -1161,6 +1205,9 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         inst->traceData = NULL;
     }
 
+    if (inst->isCopy())
+        panic("Should not commit any copy instructions!");
+
     inst->clearDependents();
 
     frontEnd->addFreeRegs(freed_regs);
@@ -1210,9 +1257,9 @@ LWBackEnd<Impl>::commitInsts()
     while (!instList.empty() && inst_num < commitWidth) {
         if (instList.back()->isSquashed()) {
             instList.back()->clearDependents();
+            ROBSquashedInsts[instList.back()->threadNumber]++;
             instList.pop_back();
             --numInsts;
-            ROBSquashedInsts[instList.back()->threadNumber]++;
             continue;
         }
 
@@ -1224,7 +1271,7 @@ LWBackEnd<Impl>::commitInsts()
             break;
         }
     }
-    n_committed_dist.sample(inst_num);
+    nCommittedDist.sample(inst_num);
 }
 
 template <class Impl>
@@ -1234,10 +1281,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
     LSQ.squash(sn);
 
     int freed_regs = 0;
-    InstListIt waiting_list_end = waitingList.end();
+    InstListIt insts_end_it = waitingList.end();
     InstListIt insts_it = waitingList.begin();
 
-    while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+    while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
     {
         if ((*insts_it)->isSquashed()) {
             ++insts_it;
@@ -1263,6 +1310,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
     while (!instList.empty() && (*insts_it)->seqNum > sn)
     {
         if ((*insts_it)->isSquashed()) {
+            panic("Instruction should not be already squashed and on list!");
             ++insts_it;
             continue;
         }
@@ -1294,18 +1342,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
         --numInsts;
     }
 
-    insts_it = waitingList.begin();
-    while (!waitingList.empty() && insts_it != waitingList.end()) {
-        if ((*insts_it)->seqNum < sn) {
-            ++insts_it;
-            continue;
-        }
-        assert((*insts_it)->isSquashed());
-
-        waitingList.erase(insts_it++);
-        waitingInsts--;
-    }
-
     while (memBarrier && memBarrier->seqNum > sn) {
         DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
                 "squashed)\n", memBarrier->seqNum);
@@ -1323,6 +1359,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
         }
     }
 
+    insts_it = replayList.begin();
+    insts_end_it = replayList.end();
+    while (!replayList.empty() && insts_it != insts_end_it) {
+        if ((*insts_it)->seqNum < sn) {
+            ++insts_it;
+            continue;
+        }
+        assert((*insts_it)->isSquashed());
+
+        replayList.erase(insts_it++);
+    }
+
     frontEnd->addFreeRegs(freed_regs);
 }
 
@@ -1393,14 +1441,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
     frontEnd->squash(inst->seqNum - 1, inst->readPC());
 }
 
-template <class Impl>
-void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
-    faultFromFetch = fault;
-    fetchHasFault = true;
-}
-
 template <class Impl>
 void
 LWBackEnd<Impl>::switchOut()
@@ -1419,17 +1459,25 @@ LWBackEnd<Impl>::doSwitchOut()
     // yet written back.
     assert(robEmpty());
     assert(!LSQ.hasStoresToWB());
-
+    writeback.clear();
+    for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+        numInstsToWB.advance();
+
+//    squash(0);
+    assert(waitingList.empty());
+    assert(instList.empty());
+    assert(replayList.empty());
+    assert(writeback.empty());
     LSQ.switchOut();
-
-    squash(0);
 }
 
 template <class Impl>
 void
 LWBackEnd<Impl>::takeOverFrom(ThreadContext *old_tc)
 {
-    switchedOut = false;
+    assert(!squashPending);
+    squashSeqNum = 0;
+    squashNextPC = 0;
     tcSquash = false;
     trapSquash = false;
 
@@ -1447,34 +1495,34 @@ template <class Impl>
 void
 LWBackEnd<Impl>::updateExeInstStats(DynInstPtr &inst)
 {
-    int thread_number = inst->threadNumber;
+    ThreadID tid = inst->threadNumber;
 
     //
     //  Pick off the software prefetches
     //
 #ifdef TARGET_ALPHA
     if (inst->isDataPrefetch())
-        exe_swp[thread_number]++;
+        exeSwp[tid]++;
     else
-        exe_inst[thread_number]++;
+        exeInst[tid]++;
 #else
-    exe_inst[thread_number]++;
+    exeInst[tid]++;
 #endif
 
     //
     //  Control operations
     //
     if (inst->isControl())
-        exe_branches[thread_number]++;
+        exeBranches[tid]++;
 
     //
     //  Memory operations
     //
     if (inst->isMemRef()) {
-        exe_refs[thread_number]++;
+        exeRefs[tid]++;
 
         if (inst->isLoad())
-            exe_loads[thread_number]++;
+            exeLoads[tid]++;
     }
 }
 
@@ -1482,7 +1530,7 @@ template <class Impl>
 void
 LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
 {
-    unsigned tid = inst->threadNumber;
+    ThreadID tid = inst->threadNumber;
 
     // keep an instruction count
     thread->numInst++;
@@ -1494,33 +1542,33 @@ LWBackEnd<Impl>::updateComInstStats(DynInstPtr &inst)
     //
 #ifdef TARGET_ALPHA
     if (inst->isDataPrefetch()) {
-        stat_com_swp[tid]++;
+        statComSwp[tid]++;
     } else {
-        stat_com_inst[tid]++;
+        statComInst[tid]++;
     }
 #else
-    stat_com_inst[tid]++;
+    statComInst[tid]++;
 #endif
 
     //
     //  Control Instructions
     //
     if (inst->isControl())
-        stat_com_branches[tid]++;
+        statComBranches[tid]++;
 
     //
     //  Memory references
     //
     if (inst->isMemRef()) {
-        stat_com_refs[tid]++;
+        statComRefs[tid]++;
 
         if (inst->isLoad()) {
-            stat_com_loads[tid]++;
+            statComLoads[tid]++;
         }
     }
 
     if (inst->isMemBarrier()) {
-        stat_com_membars[tid]++;
+        statComMembars[tid]++;
     }
 }
 
@@ -1572,6 +1620,45 @@ LWBackEnd<Impl>::dumpInsts()
         ++num;
     }
 
+    inst_list_it = --(writeback.end());
+
+    cprintf("Writeback list size: %i\n", writeback.size());
+
+    while (inst_list_it != writeback.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
     cprintf("Waiting list size: %i\n", waitingList.size());
 
     inst_list_it = --(waitingList.end());