New stats added to O3 model.
authorKevin Lim <ktlim@umich.edu>
Mon, 24 Apr 2006 21:06:00 +0000 (17:06 -0400)
committerKevin Lim <ktlim@umich.edu>
Mon, 24 Apr 2006 21:06:00 +0000 (17:06 -0400)
--HG--
extra : convert_revision : 7abb491e89e3e1a331cd19aa05ddce5184abf9e0

cpu/o3/commit.hh
cpu/o3/commit_impl.hh
cpu/o3/fetch.hh
cpu/o3/fetch_impl.hh
cpu/o3/iew.hh
cpu/o3/iew_impl.hh
cpu/o3/inst_queue.hh
cpu/o3/inst_queue_impl.hh
cpu/o3/rename.hh
cpu/o3/rename_impl.hh

index 93b74ebb00af18f17bb965198bed994ab65e69d5..f374b8fb7c3e020f831e61fe9ded63f9b9eafa7e 100644 (file)
@@ -369,6 +369,8 @@ class DefaultCommit
     /** Rename map interface. */
     RenameMap *renameMap[Impl::MaxThreads];
 
+    void updateComInstStats(DynInstPtr &inst);
+
     /** Stat for the total number of committed instructions. */
     Stats::Scalar<> commitCommittedInsts;
     /** Stat for the total number of squashed instructions discarded by commit.
@@ -383,15 +385,26 @@ class DefaultCommit
      */
     Stats::Scalar<> commitNonSpecStalls;
     /** Stat for the total number of committed branches. */
-    Stats::Scalar<> commitCommittedBranches;
+//    Stats::Scalar<> commitCommittedBranches;
     /** Stat for the total number of committed loads. */
-    Stats::Scalar<> commitCommittedLoads;
+//    Stats::Scalar<> commitCommittedLoads;
     /** Stat for the total number of committed memory references. */
-    Stats::Scalar<> commitCommittedMemRefs;
+//    Stats::Scalar<> commitCommittedMemRefs;
     /** Stat for the total number of branch mispredicts that caused a squash. */
     Stats::Scalar<> branchMispredicts;
     /** Distribution of the number of committed instructions each cycle. */
     Stats::Distribution<> numCommittedDist;
+
+    // total number of instructions committed
+    Stats::Vector<> stat_com_inst;
+    Stats::Vector<> stat_com_swp;
+    Stats::Vector<> stat_com_refs;
+    Stats::Vector<> stat_com_loads;
+    Stats::Vector<> stat_com_membars;
+    Stats::Vector<> stat_com_branches;
+
+    Stats::Scalar<> commit_eligible_samples;
+    Stats::Vector<> commit_eligible;
 };
 
 #endif // __CPU_O3_COMMIT_HH__
index ef1ba9282ea89b0e7b4ca5855037011dee091712..157e688c73f3ea9ba39a21e7f0bd506a50a08771 100644 (file)
@@ -133,6 +133,7 @@ template <class Impl>
 void
 DefaultCommit<Impl>::regStats()
 {
+    using namespace Stats;
     commitCommittedInsts
         .name(name() + ".commitCommittedInsts")
         .desc("The number of committed instructions")
@@ -150,6 +151,7 @@ DefaultCommit<Impl>::regStats()
         .desc("The number of times commit has been forced to stall to "
               "communicate backwards")
         .prereq(commitNonSpecStalls);
+/*
     commitCommittedBranches
         .name(name() + ".commitCommittedBranches")
         .desc("The number of committed branches")
@@ -162,6 +164,7 @@ DefaultCommit<Impl>::regStats()
         .name(name() + ".commitCommittedMemRefs")
         .desc("The number of committed memory references")
         .prereq(commitCommittedMemRefs);
+*/
     branchMispredicts
         .name(name() + ".branchMispredicts")
         .desc("The number of times a branch was mispredicted")
@@ -172,6 +175,73 @@ DefaultCommit<Impl>::regStats()
         .desc("Number of insts commited each cycle")
         .flags(Stats::pdf)
         ;
+
+    stat_com_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:count")
+        .desc("Number of instructions committed")
+        .flags(total)
+        ;
+
+    stat_com_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:swp_count")
+        .desc("Number of s/w prefetches committed")
+        .flags(total)
+        ;
+
+    stat_com_refs
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:refs")
+        .desc("Number of memory references committed")
+        .flags(total)
+        ;
+
+    stat_com_loads
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:loads")
+        .desc("Number of loads committed")
+        .flags(total)
+        ;
+
+    stat_com_membars
+        .init(cpu->number_of_threads)
+        .name(name() +  ".COM:membars")
+        .desc("Number of memory barriers committed")
+        .flags(total)
+        ;
+
+    stat_com_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:branches")
+        .desc("Number of branches committed")
+        .flags(total)
+        ;
+
+    //
+    //  Commit-Eligible instructions...
+    //
+    //  -> The number of instructions eligible to commit in those
+    //  cycles where we reached our commit BW limit (less the number
+    //  actually committed)
+    //
+    //  -> The average value is computed over ALL CYCLES... not just
+    //  the BW limited cycles
+    //
+    //  -> The standard deviation is computed only over cycles where
+    //  we reached the BW limit
+    //
+    commit_eligible
+        .init(cpu->number_of_threads)
+        .name(name() + ".COM:bw_limited")
+        .desc("number of insts not committed due to BW limits")
+        .flags(total)
+        ;
+
+    commit_eligible_samples
+        .name(name() + ".COM:bw_lim_events")
+        .desc("number cycles where commit BW limit reached")
+        ;
 }
 
 template <class Impl>
@@ -1060,9 +1130,7 @@ head_inst->isWriteBarrier())*/
         return false;
     }
 
-    if (head_inst->isControl()) {
-        ++commitCommittedBranches;
-    }
+    updateComInstStats(head_inst);
 
     // Now that the instruction is going to be committed, finalize its
     // trace data.
@@ -1186,6 +1254,47 @@ DefaultCommit<Impl>::robDoneSquashing()
     return true;
 }
 
+template <class Impl>
+void
+DefaultCommit<Impl>::updateComInstStats(DynInstPtr &inst)
+{
+    unsigned thread = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch()) {
+        stat_com_swp[thread]++;
+    } else {
+        stat_com_inst[thread]++;
+    }
+#else
+    stat_com_inst[thread]++;
+#endif
+
+    //
+    //  Control Instructions
+    //
+    if (inst->isControl())
+        stat_com_branches[thread]++;
+
+    //
+    //  Memory references
+    //
+    if (inst->isMemRef()) {
+        stat_com_refs[thread]++;
+
+        if (inst->isLoad()) {
+            stat_com_loads[thread]++;
+        }
+    }
+
+    if (inst->isMemBarrier()) {
+        stat_com_membars[thread]++;
+    }
+}
+
 ////////////////////////////////////////
 //                                    //
 //   SMT COMMIT POLICY MAITAINED HERE //
index f0f3f274504ffcaa7be098613d07f99bc3992961..f0b15cb863634fad82d1e14a174f18076c58311a 100644 (file)
@@ -370,6 +370,7 @@ class DefaultFetch
     Stats::Scalar<> icacheStallCycles;
     /** Stat for total number of fetched instructions. */
     Stats::Scalar<> fetchedInsts;
+    Stats::Scalar<> fetchedBranches;
     /** Stat for total number of predicted branches. */
     Stats::Scalar<> predictedBranches;
     /** Stat for total number of cycles spent fetching. */
@@ -383,6 +384,8 @@ class DefaultFetch
     Stats::Scalar<> fetchBlockedCycles;
     /** Stat for total number of fetched cache lines. */
     Stats::Scalar<> fetchedCacheLines;
+
+    Stats::Scalar<> fetchIcacheSquashes;
     /** Distribution of number of instructions fetched each cycle. */
     Stats::Distribution<> fetchNisnDist;
     Stats::Formula idleRate;
index 7abc5733fee29e4b637846e9b7551586e01c4a65..563a767dfedf3c0539a569bbc268aa6b5ef78e0d 100644 (file)
@@ -178,6 +178,11 @@ DefaultFetch<Impl>::regStats()
         .desc("Number of instructions fetch has processed")
         .prereq(fetchedInsts);
 
+    fetchedBranches
+        .name(name() + ".fetchedBranches")
+        .desc("Number of branches that fetch encountered")
+        .prereq(fetchedBranches);
+
     predictedBranches
         .name(name() + ".predictedBranches")
         .desc("Number of branches that fetch has predicted taken")
@@ -209,6 +214,11 @@ DefaultFetch<Impl>::regStats()
         .desc("Number of cache lines fetched")
         .prereq(fetchedCacheLines);
 
+    fetchIcacheSquashes
+        .name(name() + ".fetchIcacheSquashes")
+        .desc("Number of outstanding Icache misses that were squashed")
+        .prereq(fetchIcacheSquashes);
+
     fetchNisnDist
         .init(/* base value */ 0,
               /* last value */ fetchWidth,
@@ -322,8 +332,10 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
     // Can keep track of how many cache accesses go unused due to
     // misspeculation here.
     if (fetchStatus[tid] != IcacheMissStall ||
-        req != memReq[tid])
+        req != memReq[tid]) {
+        ++fetchIcacheSquashes;
         return;
+    }
 
     // Wake up the CPU (if it went to sleep and was waiting on this completion
     // event).
@@ -400,6 +412,8 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC)
 
     predict_taken = branchPred.predict(inst, next_PC, inst->threadNumber);
 
+    ++fetchedBranches;
+
     if (predict_taken) {
         ++predictedBranches;
     }
@@ -457,6 +471,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
     // If translation was successful, attempt to read the first
     // instruction.
     if (fault == NoFault) {
+#if FULL_SYSTEM
         if (cpu->system->memctrl->badaddr(memReq[tid]->paddr)) {
             DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a "
                     "misspeculating path!",
@@ -464,6 +479,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
             ret_fault = TheISA::genMachineCheckFault();
             return false;
         }
+#endif
 
         DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
         fault = cpu->mem->read(memReq[tid], cacheData[tid]);
@@ -480,6 +496,8 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
 
             MemAccessResult result = icacheInterface->access(memReq[tid]);
 
+            fetchedCacheLines++;
+
             // If the cache missed, then schedule an event to wake
             // up this stage once the cache miss completes.
             // @todo: Possibly allow for longer than 1 cycle cache hits.
@@ -499,8 +517,6 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
                         "read.\n", tid);
 
 //                memcpy(cacheData[tid], memReq[tid]->data, memReq[tid]->size);
-
-                fetchedCacheLines++;
             }
         } else {
             DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
@@ -889,10 +905,14 @@ DefaultFetch<Impl>::fetch(bool &status_change)
         if (!fetch_success)
             return;
     } else {
-        if (fetchStatus[tid] == Blocked) {
+        if (fetchStatus[tid] == Idle) {
+            ++fetchIdleCycles;
+        } else if (fetchStatus[tid] == Blocked) {
             ++fetchBlockedCycles;
         } else if (fetchStatus[tid] == Squashing) {
             ++fetchSquashCycles;
+        } else if (fetchStatus[tid] == IcacheMissStall) {
+            ++icacheStallCycles;
         }
 
         // Status is Idle, Squashing, Blocked, or IcacheMissStall, so
@@ -904,6 +924,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
 
     // If we had a stall due to an icache miss, then return.
     if (fetchStatus[tid] == IcacheMissStall) {
+        ++icacheStallCycles;
         status_change = true;
         return;
     }
index e5583781229e1244f724853353ab919f20ba99ed..58cd68b2190c2ab1861dbbafa7c0f1314245a3ef 100644 (file)
@@ -278,6 +278,8 @@ class DefaultIEW
     void tick();
 
   private:
+    void updateExeInstStats(DynInstPtr &inst);
+
     /** Pointer to main time buffer used for backwards communication. */
     TimeBuffer<TimeStruct> *timeBuffer;
 
@@ -443,9 +445,9 @@ class DefaultIEW
     /** Stat for total number of executed instructions. */
     Stats::Scalar<> iewExecutedInsts;
     /** Stat for total number of executed load instructions. */
-    Stats::Scalar<> iewExecLoadInsts;
+    Stats::Vector<> iewExecLoadInsts;
     /** Stat for total number of executed store instructions. */
-    Stats::Scalar<> iewExecStoreInsts;
+//    Stats::Scalar<> iewExecStoreInsts;
     /** Stat for total number of squashed instructions skipped at execute. */
     Stats::Scalar<> iewExecSquashedInsts;
     /** Stat for total number of memory ordering violation events. */
@@ -456,6 +458,33 @@ class DefaultIEW
     Stats::Scalar<> predictedNotTakenIncorrect;
     /** Stat for total number of mispredicted branches detected at execute. */
     Stats::Formula branchMispredicts;
+
+    Stats::Vector<> exe_swp;
+    Stats::Vector<> exe_nop;
+    Stats::Vector<> exe_refs;
+    Stats::Vector<> exe_branches;
+
+//    Stats::Vector<> issued_ops;
+/*
+    Stats::Vector<> stat_fu_busy;
+    Stats::Vector2d<> stat_fuBusy;
+    Stats::Vector<> dist_unissued;
+    Stats::Vector2d<> stat_issued_inst_type;
+*/
+    Stats::Formula issue_rate;
+    Stats::Formula iewExecStoreInsts;
+//    Stats::Formula issue_op_rate;
+//    Stats::Formula fu_busy_rate;
+
+    Stats::Vector<> iewInstsToCommit;
+    Stats::Vector<> writeback_count;
+    Stats::Vector<> producer_inst;
+    Stats::Vector<> consumer_inst;
+    Stats::Vector<> wb_penalized;
+
+    Stats::Formula wb_rate;
+    Stats::Formula wb_fanout;
+    Stats::Formula wb_penalized_rate;
 };
 
 #endif // __CPU_O3_IEW_HH__
index 21eb7dcf8d5d603b25cb3eaeaf26364caf9aa23c..2ae2e13611175e7f945bc872be68fa1f8f9f1607 100644 (file)
@@ -140,6 +140,8 @@ template <class Impl>
 void
 DefaultIEW<Impl>::regStats()
 {
+    using namespace Stats;
+
     instQueue.regStats();
 
     //ldstQueue.regStats();
@@ -195,13 +197,15 @@ DefaultIEW<Impl>::regStats()
         .desc("Number of executed instructions");
 
     iewExecLoadInsts
+        .init(cpu->number_of_threads)
         .name(name() + ".iewExecLoadInsts")
-        .desc("Number of load instructions executed");
-
+        .desc("Number of load instructions executed")
+        .flags(total);
+/*
     iewExecStoreInsts
         .name(name() + ".iewExecStoreInsts")
         .desc("Number of store instructions executed");
-
+*/
     iewExecSquashedInsts
         .name(name() + ".iewExecSquashedInsts")
         .desc("Number of squashed instructions skipped in execute");
@@ -223,6 +227,116 @@ DefaultIEW<Impl>::regStats()
         .desc("Number of branch mispredicts detected at execute");
 
     branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
+
+    exe_swp
+        .init(cpu->number_of_threads)
+        .name(name() + ".EXEC:swp")
+        .desc("number of swp insts executed")
+        .flags(total)
+        ;
+
+    exe_nop
+        .init(cpu->number_of_threads)
+        .name(name() + ".EXEC:nop")
+        .desc("number of nop insts executed")
+        .flags(total)
+        ;
+
+    exe_refs
+        .init(cpu->number_of_threads)
+        .name(name() + ".EXEC:refs")
+        .desc("number of memory reference insts executed")
+        .flags(total)
+        ;
+
+    exe_branches
+        .init(cpu->number_of_threads)
+        .name(name() + ".EXEC:branches")
+        .desc("Number of branches executed")
+        .flags(total)
+        ;
+
+    issue_rate
+        .name(name() + ".EXEC:rate")
+        .desc("Inst execution rate")
+        .flags(total)
+        ;
+    issue_rate = iewExecutedInsts / cpu->numCycles;
+
+    iewExecStoreInsts
+        .name(name() + ".EXEC:stores")
+        .desc("Number of stores executed")
+        .flags(total)
+        ;
+    iewExecStoreInsts = exe_refs - iewExecLoadInsts;
+/*
+    for (int i=0; i<Num_OpClasses; ++i) {
+        stringstream subname;
+        subname << opClassStrings[i] << "_delay";
+        issue_delay_dist.subname(i, subname.str());
+    }
+*/
+    //
+    //  Other stats
+    //
+
+    iewInstsToCommit
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:sent")
+        .desc("cumulative count of insts sent to commit")
+        .flags(total)
+        ;
+
+    writeback_count
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:count")
+        .desc("cumulative count of insts written-back")
+        .flags(total)
+        ;
+
+    producer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:producers")
+        .desc("num instructions producing a value")
+        .flags(total)
+        ;
+
+    consumer_inst
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:consumers")
+        .desc("num instructions consuming a value")
+        .flags(total)
+        ;
+
+    wb_penalized
+        .init(cpu->number_of_threads)
+        .name(name() + ".WB:penalized")
+        .desc("number of instrctions required to write to 'other' IQ")
+        .flags(total)
+        ;
+
+    wb_penalized_rate
+        .name(name() + ".WB:penalized_rate")
+        .desc ("fraction of instructions written-back that wrote to 'other' IQ")
+        .flags(total)
+        ;
+
+    wb_penalized_rate = wb_penalized / writeback_count;
+
+    wb_fanout
+        .name(name() + ".WB:fanout")
+        .desc("average fanout of values written-back")
+        .flags(total)
+        ;
+
+    wb_fanout = producer_inst / consumer_inst;
+
+    wb_rate
+        .name(name() + ".WB:rate")
+        .desc("insts written-back per cycle")
+        .flags(total)
+        ;
+    wb_rate = writeback_count / cpu->numCycles;
 }
 
 template<class Impl>
@@ -990,6 +1104,8 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
 
             instQueue.advanceTail(inst);
 
+            exe_nop[tid]++;
+
             add_to_iq = false;
         } else if (inst->isExecuted()) {
             assert(0 && "Instruction shouldn't be executed.\n");
@@ -1124,11 +1240,11 @@ DefaultIEW<Impl>::executeInsts()
                 // event adds the instruction to the queue to commit
                 fault = ldstQueue.executeLoad(inst);
 
-                ++iewExecLoadInsts;
+//                ++iewExecLoadInsts;
             } else if (inst->isStore()) {
                 ldstQueue.executeStore(inst);
 
-                ++iewExecStoreInsts;
+//                ++iewExecStoreInsts;
 
                 // If the store had a fault then it may not have a mem req
                 if (inst->req && !(inst->req->flags & LOCKED)) {
@@ -1146,13 +1262,13 @@ DefaultIEW<Impl>::executeInsts()
         } else {
             inst->execute();
 
-            ++iewExecutedInsts;
-
             inst->setExecuted();
 
             instToCommit(inst);
         }
 
+        updateExeInstStats(inst);
+
         // Check if branch was correct.  This check happens after the
         // instruction is added to the queue because even if the branch
         // is mispredicted, the branch instruction itself is still valid.
@@ -1243,17 +1359,20 @@ DefaultIEW<Impl>::writebackInsts()
     for (int inst_num = 0; inst_num < issueWidth &&
              toCommit->insts[inst_num]; inst_num++) {
         DynInstPtr inst = toCommit->insts[inst_num];
+        int tid = inst->threadNumber;
 
         DPRINTF(IEW, "Sending instructions to commit, PC %#x.\n",
                 inst->readPC());
 
+        iewInstsToCommit[tid]++;
+
         // Some instructions will be sent to commit without having
         // executed because they need commit to handle them.
         // E.g. Uncached loads have not actually executed when they
         // are first sent to commit.  Instead commit must tell the LSQ
         // when it's ready to execute the uncached load.
         if (!inst->isSquashed() && inst->isExecuted()) {
-            instQueue.wakeDependents(inst);
+            int dependents = instQueue.wakeDependents(inst);
 
             for (int i = 0; i < inst->numDestRegs(); i++) {
                 //mark as Ready
@@ -1261,6 +1380,10 @@ DefaultIEW<Impl>::writebackInsts()
                         inst->renamedDestRegIdx(i));
                 scoreboard->setReg(inst->renamedDestRegIdx(i));
             }
+
+            producer_inst[tid]++;
+            consumer_inst[tid]+= dependents;
+            writeback_count[tid]++;
         }
     }
 }
@@ -1390,3 +1513,39 @@ DefaultIEW<Impl>::tick()
         cpu->activityThisCycle();
     }
 }
+
+template <class Impl>
+void
+DefaultIEW<Impl>::updateExeInstStats(DynInstPtr &inst)
+{
+    int thread_number = inst->threadNumber;
+
+    //
+    //  Pick off the software prefetches
+    //
+#ifdef TARGET_ALPHA
+    if (inst->isDataPrefetch())
+        exe_swp[thread_number]++;
+    else
+        iewExecutedInsts++;
+#else
+    iewExecutedInsts[thread_number]++;
+#endif
+
+    //
+    //  Control operations
+    //
+    if (inst->isControl())
+        exe_branches[thread_number]++;
+
+    //
+    //  Memory operations
+    //
+    if (inst->isMemRef()) {
+        exe_refs[thread_number]++;
+
+        if (inst->isLoad()) {
+            iewExecLoadInsts[thread_number]++;
+        }
+    }
+}
index 283bbdc22e6425909bd838e01a13fc1513936d64..06d9937f2960ddcab6d1f38c7d585853a6c71625 100644 (file)
@@ -185,7 +185,7 @@ class InstructionQueue
     void commit(const InstSeqNum &inst, unsigned tid = 0);
 
     /** Wakes all dependents of a completed instruction. */
-    void wakeDependents(DynInstPtr &completed_inst);
+    int wakeDependents(DynInstPtr &completed_inst);
 
     /** Adds a ready memory instruction to the ready list. */
     void addReadyMemInst(DynInstPtr &ready_inst);
@@ -479,6 +479,7 @@ class InstructionQueue
     /** Stat for number of non-speculative instructions added. */
     Stats::Scalar<> iqNonSpecInstsAdded;
 //    Stats::Scalar<> iqIntInstsAdded;
+    Stats::Scalar<> iqInstsIssued;
     /** Stat for number of integer instructions issued. */
     Stats::Scalar<> iqIntInstsIssued;
 //    Stats::Scalar<> iqFloatInstsAdded;
@@ -505,6 +506,20 @@ class InstructionQueue
      */
     Stats::Scalar<> iqSquashedNonSpecRemoved;
 
+    Stats::VectorDistribution<> queue_res_dist;
+    Stats::Vector<> n_issued_dist;
+    Stats::VectorDistribution<> issue_delay_dist;
+
+    Stats::Vector<> stat_fu_busy;
+//    Stats::Vector<> dist_unissued;
+    Stats::Vector2d<> stat_issued_inst_type;
+
+    Stats::Formula issue_rate;
+//    Stats::Formula issue_stores;
+//    Stats::Formula issue_op_rate;
+    Stats::Vector<> fu_busy;  //cumulative fu busy
+
+    Stats::Formula fu_busy_rate;
 };
 
 #endif //__CPU_O3_INST_QUEUE_HH__
index cfdd25cd5cb43b1f376ef764d2b8a97f14bad683..804bc2472283c231ff7ae4cf5d0c51aae8e1a18b 100644 (file)
@@ -224,6 +224,7 @@ template <class Impl>
 void
 InstructionQueue<Impl>::regStats()
 {
+    using namespace Stats;
     iqInstsAdded
         .name(name() + ".iqInstsAdded")
         .desc("Number of instructions added to the IQ (excludes non-spec)")
@@ -236,6 +237,11 @@ InstructionQueue<Impl>::regStats()
 
 //    iqIntInstsAdded;
 
+    iqInstsIssued
+        .name(name() + ".iqInstsIssued")
+        .desc("Number of instructions issued")
+        .prereq(iqInstsIssued);
+
     iqIntInstsIssued
         .name(name() + ".iqIntInstsIssued")
         .desc("Number of integer instructions issued")
@@ -291,6 +297,103 @@ InstructionQueue<Impl>::regStats()
         .desc("Number of squashed non-spec instructions that were removed")
         .prereq(iqSquashedNonSpecRemoved);
 
+    queue_res_dist
+        .init(Num_OpClasses, 0, 99, 2)
+        .name(name() + ".IQ:residence:")
+        .desc("cycles from dispatch to issue")
+        .flags(total | pdf | cdf )
+        ;
+    for (int i = 0; i < Num_OpClasses; ++i) {
+        queue_res_dist.subname(i, opClassStrings[i]);
+    }
+    n_issued_dist
+        .init(totalWidth + 1)
+        .name(name() + ".ISSUE:issued_per_cycle")
+        .desc("Number of insts issued each cycle")
+        .flags(total | pdf | dist)
+        ;
+/*
+    dist_unissued
+        .init(Num_OpClasses+2)
+        .name(name() + ".ISSUE:unissued_cause")
+        .desc("Reason ready instruction not issued")
+        .flags(pdf | dist)
+        ;
+    for (int i=0; i < (Num_OpClasses + 2); ++i) {
+        dist_unissued.subname(i, unissued_names[i]);
+    }
+*/
+    stat_issued_inst_type
+        .init(numThreads,Num_OpClasses)
+        .name(name() + ".ISSUE:FU_type")
+        .desc("Type of FU issued")
+        .flags(total | pdf | dist)
+        ;
+    stat_issued_inst_type.ysubnames(opClassStrings);
+
+    //
+    //  How long did instructions for a particular FU type wait prior to issue
+    //
+
+    issue_delay_dist
+        .init(Num_OpClasses,0,99,2)
+        .name(name() + ".ISSUE:")
+        .desc("cycles from operands ready to issue")
+        .flags(pdf | cdf)
+        ;
+
+    for (int i=0; i<Num_OpClasses; ++i) {
+        stringstream subname;
+        subname << opClassStrings[i] << "_delay";
+        issue_delay_dist.subname(i, subname.str());
+    }
+
+    issue_rate
+        .name(name() + ".ISSUE:rate")
+        .desc("Inst issue rate")
+        .flags(total)
+        ;
+    issue_rate = iqInstsIssued / cpu->numCycles;
+/*
+    issue_stores
+        .name(name() + ".ISSUE:stores")
+        .desc("Number of stores issued")
+        .flags(total)
+        ;
+    issue_stores = exe_refs - exe_loads;
+*/
+/*
+    issue_op_rate
+        .name(name() + ".ISSUE:op_rate")
+        .desc("Operation issue rate")
+        .flags(total)
+        ;
+    issue_op_rate = issued_ops / numCycles;
+*/
+    stat_fu_busy
+        .init(Num_OpClasses)
+        .name(name() + ".ISSUE:fu_full")
+        .desc("attempts to use FU when none available")
+        .flags(pdf | dist)
+        ;
+    for (int i=0; i < Num_OpClasses; ++i) {
+        stat_fu_busy.subname(i, opClassStrings[i]);
+    }
+
+    fu_busy
+        .init(numThreads)
+        .name(name() + ".ISSUE:fu_busy_cnt")
+        .desc("FU busy when requested")
+        .flags(total)
+        ;
+
+    fu_busy_rate
+        .name(name() + ".ISSUE:fu_busy_rate")
+        .desc("FU busy rate (busy events/executed inst)")
+        .flags(total)
+        ;
+    fu_busy_rate = fu_busy / iqInstsIssued;
+
     for ( int i=0; i < numThreads; i++) {
         // Tell mem dependence unit to reg stats as well.
         memDepUnit[i].regStats();
@@ -658,6 +761,8 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
         int idx = fuPool->getUnit(op_class);
 
+        int tid = issuing_inst->threadNumber;
+
         if (idx == -2) {
             assert(op_class == No_OpClass);
 
@@ -666,7 +771,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
             DPRINTF(IQ, "Thread %i: Issuing instruction PC that needs no FU"
                     " %#x [sn:%lli]\n",
-                    issuing_inst->threadNumber, issuing_inst->readPC(),
+                    tid, issuing_inst->readPC(),
                     issuing_inst->seqNum);
 
             readyInsts[op_class].pop();
@@ -685,14 +790,15 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 // Memory instructions can not be freed from the IQ until they
                 // complete.
                 ++freeEntries;
-                count[issuing_inst->threadNumber]--;
+                count[tid]--;
                 issuing_inst->removeInIQ();
             } else {
-                memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+                memDepUnit[tid].issue(issuing_inst);
             }
 
             listOrder.erase(order_it++);
 
+            stat_issued_inst_type[tid][op_class]++;
         } else if (idx != -1) {
             int op_latency = fuPool->getOpLatency(op_class);
 
@@ -722,7 +828,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
 
             DPRINTF(IQ, "Thread %i: Issuing instruction PC %#x "
                     "[sn:%lli]\n",
-                    issuing_inst->threadNumber, issuing_inst->readPC(),
+                    tid, issuing_inst->readPC(),
                     issuing_inst->seqNum);
 
             readyInsts[op_class].pop();
@@ -741,14 +847,17 @@ InstructionQueue<Impl>::scheduleReadyInsts()
                 // Memory instructions can not be freed from the IQ until they
                 // complete.
                 ++freeEntries;
-                count[issuing_inst->threadNumber]--;
+                count[tid]--;
                 issuing_inst->removeInIQ();
             } else {
-                memDepUnit[issuing_inst->threadNumber].issue(issuing_inst);
+                memDepUnit[tid].issue(issuing_inst);
             }
 
             listOrder.erase(order_it++);
+            stat_issued_inst_type[tid][op_class]++;
         } else {
+            stat_fu_busy[op_class]++;
+            fu_busy[tid]++;
             ++order_it;
         }
     }
@@ -808,9 +917,11 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, unsigned tid)
 }
 
 template <class Impl>
-void
+int
 InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
 {
+    int dependents = 0;
+
     DPRINTF(IQ, "Waking dependents of completed instruction.\n");
 
     assert(!completed_inst->isSquashed());
@@ -875,6 +986,8 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
             curr = prev->next;
             prev->inst = NULL;
 
+            ++dependents;
+
             delete prev;
         }
 
@@ -886,6 +999,7 @@ InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
         // Mark the scoreboard as having that register ready.
         regScoreboard[dest_reg] = true;
     }
+    return dependents;
 }
 
 template <class Impl>
index d5beccde9bdcd7e0fc484b45de0a58de3ac91d68..c6f8f97aa2fa26ebb9ba2b750dc7884e4cb0ad4a 100644 (file)
@@ -90,7 +90,7 @@ class DefaultRename
         Squashing,
         Blocked,
         Unblocking,
-        BarrierStall
+        SerializeStall
     };
 
   private:
@@ -359,8 +359,8 @@ class DefaultRename
     /** Tracks which stages are telling decode to stall. */
     Stalls stalls[Impl::MaxThreads];
 
-    /** The barrier instruction that rename has stalled on. */
-    DynInstPtr barrierInst[Impl::MaxThreads];
+    /** The serialize instruction that rename has stalled on. */
+    DynInstPtr serializeInst[Impl::MaxThreads];
 
     /** Records if rename needs to serialize on the next instruction for any
      * thread.
@@ -419,8 +419,8 @@ class DefaultRename
     Stats::Scalar<> renameIdleCycles;
     /** Stat for total number of cycles spent blocking. */
     Stats::Scalar<> renameBlockCycles;
-    /** Stat for total number of cycles spent stalling for a barrier. */
-    Stats::Scalar<> renameBarrierCycles;
+    /** Stat for total number of cycles spent stalling for a serializing inst. */
+    Stats::Scalar<> renameSerializeStallCycles;
     /** Stat for total number of cycles spent running normally. */
     Stats::Scalar<> renameRunCycles;
     /** Stat for total number of cycles spent unblocking. */
@@ -446,6 +446,8 @@ class DefaultRename
     Stats::Scalar<> renameCommittedMaps;
     /** Stat for total number of mappings that were undone due to a squash. */
     Stats::Scalar<> renameUndoneMaps;
+    Stats::Scalar<> renamedSerializing;
+    Stats::Scalar<> renamedTempSerializing;
 };
 
 #endif // __CPU_O3_RENAME_HH__
index 441118ef1e69cf5e47d8234cb02b4b842111b9e5..e2921192118bd0146e62035cac96f22d8037dcb0 100644 (file)
@@ -53,7 +53,7 @@ DefaultRename<Impl>::DefaultRename(Params *params)
 
         stalls[i].iew = false;
         stalls[i].commit = false;
-        barrierInst[i] = NULL;
+        serializeInst[i] = NULL;
 
         instsInProgress[i] = 0;
 
@@ -78,69 +78,79 @@ void
 DefaultRename<Impl>::regStats()
 {
     renameSquashCycles
-        .name(name() + ".renameSquashCycles")
+        .name(name() + ".RENAME:SquashCycles")
         .desc("Number of cycles rename is squashing")
         .prereq(renameSquashCycles);
     renameIdleCycles
-        .name(name() + ".renameIdleCycles")
+        .name(name() + ".RENAME:IdleCycles")
         .desc("Number of cycles rename is idle")
         .prereq(renameIdleCycles);
     renameBlockCycles
-        .name(name() + ".renameBlockCycles")
+        .name(name() + ".RENAME:BlockCycles")
         .desc("Number of cycles rename is blocking")
         .prereq(renameBlockCycles);
-    renameBarrierCycles
-        .name(name() + ".renameBarrierCycles")
-        .desc("Number of cycles rename is blocking due to a barrier stall")
-        .prereq(renameBarrierCycles);
+    renameSerializeStallCycles
+        .name(name() + ".RENAME:serializeStallCycles")
+        .desc("count of cycles rename stalled for serializing inst")
+        .flags(Stats::total);
     renameRunCycles
-        .name(name() + ".renameRunCycles")
+        .name(name() + ".RENAME:RunCycles")
         .desc("Number of cycles rename is running")
         .prereq(renameIdleCycles);
     renameUnblockCycles
-        .name(name() + ".renameUnblockCycles")
+        .name(name() + ".RENAME:UnblockCycles")
         .desc("Number of cycles rename is unblocking")
         .prereq(renameUnblockCycles);
     renameRenamedInsts
-        .name(name() + ".renameRenamedInsts")
+        .name(name() + ".RENAME:RenamedInsts")
         .desc("Number of instructions processed by rename")
         .prereq(renameRenamedInsts);
     renameSquashedInsts
-        .name(name() + ".renameSquashedInsts")
+        .name(name() + ".RENAME:SquashedInsts")
         .desc("Number of squashed instructions processed by rename")
         .prereq(renameSquashedInsts);
     renameROBFullEvents
-        .name(name() + ".renameROBFullEvents")
+        .name(name() + ".RENAME:ROBFullEvents")
         .desc("Number of times rename has blocked due to ROB full")
         .prereq(renameROBFullEvents);
     renameIQFullEvents
-        .name(name() + ".renameIQFullEvents")
+        .name(name() + ".RENAME:IQFullEvents")
         .desc("Number of times rename has blocked due to IQ full")
         .prereq(renameIQFullEvents);
     renameLSQFullEvents
-        .name(name() + ".renameLSQFullEvents")
+        .name(name() + ".RENAME:LSQFullEvents")
         .desc("Number of times rename has blocked due to LSQ full")
         .prereq(renameLSQFullEvents);
     renameFullRegistersEvents
-        .name(name() + ".renameFullRegisterEvents")
+        .name(name() + ".RENAME:FullRegisterEvents")
         .desc("Number of times there has been no free registers")
         .prereq(renameFullRegistersEvents);
     renameRenamedOperands
-        .name(name() + ".renameRenamedOperands")
+        .name(name() + ".RENAME:RenamedOperands")
         .desc("Number of destination operands rename has renamed")
         .prereq(renameRenamedOperands);
     renameRenameLookups
-        .name(name() + ".renameRenameLookups")
+        .name(name() + ".RENAME:RenameLookups")
         .desc("Number of register rename lookups that rename has made")
         .prereq(renameRenameLookups);
     renameCommittedMaps
-        .name(name() + ".renameCommittedMaps")
+        .name(name() + ".RENAME:CommittedMaps")
         .desc("Number of HB maps that are committed")
         .prereq(renameCommittedMaps);
     renameUndoneMaps
-        .name(name() + ".renameUndoneMaps")
+        .name(name() + ".RENAME:UndoneMaps")
         .desc("Number of HB maps that are undone due to squashing")
         .prereq(renameUndoneMaps);
+    renamedSerializing
+        .name(name() + ".RENAME:serializingInsts")
+        .desc("count of serializing insts renamed")
+        .flags(Stats::total)
+        ;
+    renamedTempSerializing
+        .name(name() + ".RENAME:tempSerializingInsts")
+        .desc("count of temporary serializing insts renamed")
+        .flags(Stats::total)
+        ;
 }
 
 template <class Impl>
@@ -254,7 +264,7 @@ DefaultRename<Impl>::squash(unsigned tid)
     // cycle and there should be space to hold everything due to the squash.
     if (renameStatus[tid] == Blocked ||
         renameStatus[tid] == Unblocking ||
-        renameStatus[tid] == BarrierStall) {
+        renameStatus[tid] == SerializeStall) {
 #if !FULL_SYSTEM
         // In syscall emulation, we can have both a block and a squash due
         // to a syscall in the same cycle.  This would cause both signals to
@@ -267,7 +277,7 @@ DefaultRename<Impl>::squash(unsigned tid)
 #else
         toDecode->renameUnblock[tid] = 1;
 #endif
-        barrierInst[tid] = NULL;
+        serializeInst[tid] = NULL;
     }
 
     // Set the status to Squashing.
@@ -370,8 +380,8 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
         ++renameBlockCycles;
     } else if (renameStatus[tid] == Squashing) {
         ++renameSquashCycles;
-    } else if (renameStatus[tid] == BarrierStall) {
-        ++renameBarrierCycles;
+    } else if (renameStatus[tid] == SerializeStall) {
+        ++renameSerializeStallCycles;
     }
 
     if (renameStatus[tid] == Running ||
@@ -535,14 +545,18 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
         if (inst->isSerializeBefore() && !inst->isSerializeHandled()) {
             DPRINTF(Rename, "Serialize before instruction encountered.\n");
 
-            if (!inst->isTempSerializeBefore())
+            if (!inst->isTempSerializeBefore()) {
+                renamedSerializing++;
                 inst->setSerializeHandled();
+            } else {
+                renamedTempSerializing++;
+            }
 
-            // Change status over to BarrierStall so that other stages know
+            // Change status over to SerializeStall so that other stages know
             // what this is blocked on.
-            renameStatus[tid] = BarrierStall;
+            renameStatus[tid] = SerializeStall;
 
-            barrierInst[tid] = inst;
+            serializeInst[tid] = inst;
 
             blockThisCycle = true;
 
@@ -716,9 +730,9 @@ DefaultRename<Impl>::block(unsigned tid)
             wroteToTimeBuffer = true;
         }
 
-        // Rename can not go from BarrierStall to Blocked, otherwise it would
-        // not know to complete the barrier stall.
-        if (renameStatus[tid] != BarrierStall) {
+        // Rename can not go from SerializeStall to Blocked, otherwise it would
+        // not know to complete the serialize stall.
+        if (renameStatus[tid] != SerializeStall) {
             // Set status to Blocked.
             renameStatus[tid] = Blocked;
             return true;
@@ -735,7 +749,7 @@ DefaultRename<Impl>::unblock(unsigned tid)
     DPRINTF(Rename, "[tid:%u]: Trying to unblock.\n", tid);
 
     // Rename is done unblocking if the skid buffer is empty.
-    if (skidBuffer[tid].empty() && renameStatus[tid] != BarrierStall) {
+    if (skidBuffer[tid].empty() && renameStatus[tid] != SerializeStall) {
 
         DPRINTF(Rename, "[tid:%u]: Done unblocking.\n", tid);
 
@@ -1008,9 +1022,9 @@ DefaultRename<Impl>::checkStall(unsigned tid)
     } else if (renameMap[tid]->numFreeEntries() <= 0) {
         DPRINTF(Rename,"[tid:%i]: Stall: RenameMap has 0 free entries.\n", tid);
         ret_val = true;
-    } else if (renameStatus[tid] == BarrierStall &&
+    } else if (renameStatus[tid] == SerializeStall &&
                (!emptyROB[tid] || instsInProgress[tid])) {
-        DPRINTF(Rename,"[tid:%i]: Stall: Barrier stall and ROB is not "
+        DPRINTF(Rename,"[tid:%i]: Stall: Serialize stall and ROB is not "
                 "empty.\n",
                 tid);
         ret_val = true;
@@ -1064,7 +1078,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
     //         if so then go to unblocking
     // If status was Squashing
     //     check if squashing is not high.  Switch to running this cycle.
-    // If status was barrier stall
+    // If status was serialize stall
     //     check if ROB is empty and no insts are in flight to the ROB
 
     readFreeEntries(tid);
@@ -1113,12 +1127,12 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
         return false;
     }
 
-    if (renameStatus[tid] == BarrierStall) {
+    if (renameStatus[tid] == SerializeStall) {
         // Stall ends once the ROB is free.
-        DPRINTF(Rename, "[tid:%u]: Done with barrier stall, switching to "
+        DPRINTF(Rename, "[tid:%u]: Done with serialize stall, switching to "
                 "unblocking.\n", tid);
 
-        DynInstPtr barr_inst = barrierInst[tid];
+        DynInstPtr serial_inst = serializeInst[tid];
 
         renameStatus[tid] = Unblocking;
 
@@ -1126,21 +1140,21 @@ DefaultRename<Impl>::checkSignalsAndUpdate(unsigned tid)
 
         DPRINTF(Rename, "[tid:%u]: Processing instruction [%lli] with "
                 "PC %#x.\n",
-                tid, barr_inst->seqNum, barr_inst->readPC());
+                tid, serial_inst->seqNum, serial_inst->readPC());
 
         // Put instruction into queue here.
-        barr_inst->clearSerializeBefore();
+        serial_inst->clearSerializeBefore();
 
         if (!skidBuffer[tid].empty()) {
-            skidBuffer[tid].push_front(barr_inst);
+            skidBuffer[tid].push_front(serial_inst);
         } else {
-            insts[tid].push_front(barr_inst);
+            insts[tid].push_front(serial_inst);
         }
 
         DPRINTF(Rename, "[tid:%u]: Instruction must be processed by rename."
                 " Adding to front of list.", tid);
 
-        barrierInst[tid] = NULL;
+        serializeInst[tid] = NULL;
 
         return true;
     }