Ozone updates.
authorKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
committerKevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
cpu/ozone/front_end.hh:
cpu/ozone/front_end_impl.hh:
cpu/ozone/lw_back_end.hh:
    Support latency for Ozone FE and BE.
cpu/ozone/lw_back_end_impl.hh:
    Support latency for Ozone FE and BE.

    Also fixes for switching out, profiling.
cpu/ozone/lw_lsq.hh:
cpu/ozone/lw_lsq_impl.hh:
    Fixes for switching out.
cpu/ozone/simple_params.hh:
    Updated parameters.

--HG--
extra : convert_revision : 21d4846a59a2239bfdf8fe92e47fd0972debe4f5

cpu/ozone/front_end.hh
cpu/ozone/front_end_impl.hh
cpu/ozone/lw_back_end.hh
cpu/ozone/lw_back_end_impl.hh
cpu/ozone/lw_lsq.hh
cpu/ozone/lw_lsq_impl.hh
cpu/ozone/simple_params.hh

index dd382491fd5e8508fec587cdb066219b038c7601..b677e667c8fa185f6695a1f571667127c53ad67c 100644 (file)
@@ -31,6 +31,7 @@
 
 #include <deque>
 
+#include "base/timebuf.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/o3/bpred_unit.hh"
 #include "cpu/ozone/rename_table.hh"
@@ -210,15 +211,21 @@ class FrontEnd
     void dumpInsts();
 
   private:
+    TimeBuffer<int> numInstsReady;
+
     typedef typename std::deque<DynInstPtr> InstBuff;
     typedef typename InstBuff::iterator InstBuffIt;
 
+    InstBuff feBuffer;
+
     InstBuff instBuffer;
 
     int instBufferSize;
 
     int maxInstBufferSize;
 
+    int latency;
+
     int width;
 
     int freeRegs;
index ca9948b7dc598e39e167862bd526416666e26c72..09fc2e2f89e07dede523714c35f3ba068bdc8c0d 100644 (file)
@@ -41,8 +41,10 @@ template <class Impl>
 FrontEnd<Impl>::FrontEnd(Params *params)
     : branchPred(params),
       icacheInterface(params->icacheInterface),
+      numInstsReady(params->frontEndLatency, 0),
       instBufferSize(0),
       maxInstBufferSize(params->maxInstBufferSize),
+      latency(params->frontEndLatency),
       width(params->frontEndWidth),
       freeRegs(params->numPhysicalRegs),
       numPhysRegs(params->numPhysicalRegs),
@@ -261,6 +263,18 @@ FrontEnd<Impl>::tick()
     if (switchedOut)
         return;
 
+    for (int insts_to_queue = numInstsReady[-latency];
+         !instBuffer.empty() && insts_to_queue;
+         --insts_to_queue)
+    {
+        DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
+                instBuffer.front()->seqNum);
+        feBuffer.push_back(instBuffer.front());
+        instBuffer.pop_front();
+    }
+
+    numInstsReady.advance();
+
     // @todo: Maybe I want to just have direct communication...
     if (fromCommit->doneSeqNum) {
         branchPred.update(fromCommit->doneSeqNum, 0);
@@ -349,6 +363,7 @@ FrontEnd<Impl>::tick()
         // latency
         instBuffer.push_back(inst);
         ++instBufferSize;
+        numInstsReady[0]++;
         ++num_inst;
 
 #if FULL_SYSTEM
@@ -570,6 +585,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
     instruction->fault = fault;
     instruction->setCanIssue();
     instBuffer.push_back(instruction);
+    numInstsReady[0]++;
     ++instBufferSize;
 }
 
@@ -599,6 +615,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
         freeRegs+= inst->numDestRegs();
     }
 
+    while (!feBuffer.empty() &&
+           feBuffer.back()->seqNum > squash_num) {
+        DynInstPtr inst = feBuffer.back();
+
+        DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+                inst->seqNum, inst->readPC());
+
+        inst->clearDependents();
+
+        feBuffer.pop_back();
+        --instBufferSize;
+
+        freeRegs+= inst->numDestRegs();
+    }
+
     // Copy over rename table from the back end.
     renameTable.copyFrom(backEnd->renameTable);
 
@@ -633,13 +664,13 @@ template <class Impl>
 typename Impl::DynInstPtr
 FrontEnd<Impl>::getInst()
 {
-    if (instBufferSize == 0) {
+    if (feBuffer.empty()) {
         return NULL;
     }
 
-    DynInstPtr inst = instBuffer.front();
+    DynInstPtr inst = feBuffer.front();
 
-    instBuffer.pop_front();
+    feBuffer.pop_front();
 
     --instBufferSize;
 
@@ -857,6 +888,7 @@ FrontEnd<Impl>::doSwitchOut()
     squash(0, 0);
     instBuffer.clear();
     instBufferSize = 0;
+    feBuffer.clear();
     status = Idle;
 }
 
index 19f2b2b6184a2e8dcd1bedb0b39e9f8f46e08cba..4e2f5606c1c9b9e32e396fff0482ee97e72d5d6d 100644 (file)
@@ -78,7 +78,7 @@ class LWBackEnd
     TimeBuffer<IssueToExec> i2e;
     typename TimeBuffer<IssueToExec>::wire instsToExecute;
     TimeBuffer<ExecToCommit> e2c;
-    TimeBuffer<Writeback> numInstsToWB;
+    TimeBuffer<int> numInstsToWB;
 
     TimeBuffer<CommStruct> *comm;
     typename TimeBuffer<CommStruct>::wire toIEW;
@@ -157,7 +157,7 @@ class LWBackEnd
 
     Tick lastCommitCycle;
 
-    bool robEmpty() { return instList.empty(); }
+    bool robEmpty() { return numInsts == 0; }
 
     bool isFull() { return numInsts >= numROBEntries; }
     bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@@ -212,6 +212,7 @@ class LWBackEnd
     }
 
     void instToCommit(DynInstPtr &inst);
+    void readyInstsForCommit();
 
     void switchOut();
     void doSwitchOut();
@@ -293,12 +294,13 @@ class LWBackEnd
 
     MemReqPtr memReq;
 
+    int latency;
+
     // General back end width. Used if the more specific isn't given.
     int width;
 
     // Dispatch width.
     int dispatchWidth;
-    int numDispatchEntries;
     int dispatchSize;
 
     int waitingInsts;
@@ -323,6 +325,7 @@ class LWBackEnd
 
     int numROBEntries;
     int numInsts;
+    bool lsqLimits;
 
     std::set<InstSeqNum> waitingMemOps;
     typedef std::set<InstSeqNum>::iterator MemIt;
@@ -333,9 +336,6 @@ class LWBackEnd
     InstSeqNum squashSeqNum;
     Addr squashNextPC;
 
-    Fault faultFromFetch;
-    bool fetchHasFault;
-
     bool switchedOut;
     bool switchPending;
 
@@ -359,8 +359,6 @@ class LWBackEnd
     std::list<DynInstPtr> replayList;
     std::list<DynInstPtr> writeback;
 
-    int latency;
-
     int squashLatency;
 
     bool exactFullStall;
@@ -397,9 +395,11 @@ class LWBackEnd
     Stats::Scalar<> lsqInversion;
 
     Stats::Vector<> nIssuedDist;
+/*
     Stats::VectorDistribution<> issueDelayDist;
 
     Stats::VectorDistribution<> queueResDist;
+*/
 /*
     Stats::Vector<> stat_fu_busy;
     Stats::Vector2d<> stat_fuBusy;
@@ -447,7 +447,7 @@ class LWBackEnd
 
     Stats::Vector<>  ROBCount;  // cumulative ROB occupancy
     Stats::Formula ROBOccRate;
-    Stats::VectorDistribution<> ROBOccDist;
+//    Stats::VectorDistribution<> ROBOccDist;
   public:
     void dumpInsts();
 
index 9e1cd28cf13d2e4c4295342368d85484d2e810de..9a6ad4c14c94ba34df9bfb48ea3e22ea876e6e78 100644 (file)
@@ -151,8 +151,10 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
 
 //    iewStage->wakeCPU();
 
-    if (be->isSwitchedOut())
-        return;
+    assert(inst->isSquashed() || !be->isSwitchedOut());
+
+//    if (be->isSwitchedOut() && inst->isLoad())
+//        return;
 
     if (dcacheMiss) {
         be->removeDcacheMiss(inst);
@@ -208,14 +210,14 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
 
 template <class Impl>
 LWBackEnd<Impl>::LWBackEnd(Params *params)
-    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
       trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
-      dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+      dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
+      width(params->backEndWidth), lsqLimits(params->lsqLimits),
       exactFullStall(true)
 {
     numROBEntries = params->numROBEntries;
     numInsts = 0;
-    numDispatchEntries = 32;
     maxOutstandingMemOps = params->maxOutstandingMemOps;
     numWaitingMemOps = 0;
     waitingInsts = 0;
@@ -251,6 +253,8 @@ void
 LWBackEnd<Impl>::regStats()
 {
     using namespace Stats;
+    LSQ.regStats();
+
     robCapEvents
         .init(cpu->number_of_threads)
         .name(name() + ".ROB:cap_events")
@@ -377,6 +381,7 @@ LWBackEnd<Impl>::regStats()
         .desc("Number of insts issued each cycle")
         .flags(total | pdf | dist)
         ;
+/*
     issueDelayDist
         .init(Num_OpClasses,0,99,2)
         .name(name() + ".ISSUE:")
@@ -393,7 +398,7 @@ LWBackEnd<Impl>::regStats()
     for (int i = 0; i < Num_OpClasses; ++i) {
         queueResDist.subname(i, opClassStrings[i]);
     }
-
+*/
     writebackCount
         .init(cpu->number_of_threads)
         .name(name() + ".WB:count")
@@ -555,13 +560,14 @@ LWBackEnd<Impl>::regStats()
         .flags(total)
         ;
     ROBOccRate = ROBCount / cpu->numCycles;
-
+/*
     ROBOccDist
         .init(cpu->number_of_threads,0,numROBEntries,2)
         .name(name() + ".ROB:occ_dist")
         .desc("ROB Occupancy per cycle")
         .flags(total | cdf)
         ;
+*/
 }
 
 template <class Impl>
@@ -654,18 +660,22 @@ LWBackEnd<Impl>::tick()
 {
     DPRINTF(BE, "Ticking back end\n");
 
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
     if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
         cpu->signalSwitched();
         return;
     }
 
+    readyInstsForCommit();
+
+    numInstsToWB.advance();
+
     ROBCount[0]+= numInsts;
 
     wbCycle = 0;
 
-    // Read in any done instruction information and update the IQ or LSQ.
-    updateStructures();
-
 #if FULL_SYSTEM
     checkInterrupts();
 
@@ -740,6 +750,10 @@ LWBackEnd<Impl>::dispatchInsts()
     while (numInsts < numROBEntries &&
            numWaitingMemOps < maxOutstandingMemOps) {
         // Get instruction from front of time buffer
+        if (lsqLimits && LSQ.isFull()) {
+            break;
+        }
+
         DynInstPtr inst = frontEnd->getInst();
         if (!inst) {
             break;
@@ -798,6 +812,7 @@ LWBackEnd<Impl>::dispatchInsts()
                 inst->setIssued();
                 inst->setExecuted();
                 inst->setCanCommit();
+                numInstsToWB[0]++;
             } else {
                 DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
                         "exeList.\n",
@@ -987,16 +1002,10 @@ template<class Impl>
 void
 LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
 {
-
     DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
             inst->seqNum, inst->readPC());
 
     if (!inst->isSquashed()) {
-        DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
-                inst->seqNum, inst->readPC());
-
-        inst->setCanCommit();
-
         if (inst->isExecuted()) {
             inst->setResultReady();
             int dependents = wakeDependents(inst);
@@ -1007,8 +1016,32 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
         }
     }
 
+    writeback.push_back(inst);
+
+    numInstsToWB[0]++;
+
     writebackCount[0]++;
 }
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+    for (int i = numInstsToWB[-latency];
+         !writeback.empty() && i;
+         --i)
+    {
+        DynInstPtr inst = writeback.front();
+        writeback.pop_front();
+        if (!inst->isSquashed()) {
+            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            inst->setCanCommit();
+        }
+    }
+}
+
 #if 0
 template <class Impl>
 void
@@ -1221,6 +1254,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
         ++freed_regs;
     }
 
+#if FULL_SYSTEM
+    if (thread->profile) {
+//        bool usermode =
+//            (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+//        thread->profilePC = usermode ? 1 : inst->readPC();
+        thread->profilePC = inst->readPC();
+        ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
+                                                     inst->staticInst);
+
+        if (node)
+            thread->profileNode = node;
+    }
+#endif
+
     if (inst->traceData) {
         inst->traceData->setFetchSeq(inst->seqNum);
         inst->traceData->setCPSeq(thread->numInst);
@@ -1280,9 +1327,9 @@ LWBackEnd<Impl>::commitInsts()
     while (!instList.empty() && inst_num < commitWidth) {
         if (instList.back()->isSquashed()) {
             instList.back()->clearDependents();
+            ROBSquashedInsts[instList.back()->threadNumber]++;
             instList.pop_back();
             --numInsts;
-            ROBSquashedInsts[instList.back()->threadNumber]++;
             continue;
         }
 
@@ -1304,10 +1351,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
     LSQ.squash(sn);
 
     int freed_regs = 0;
-    InstListIt waiting_list_end = waitingList.end();
+    InstListIt insts_end_it = waitingList.end();
     InstListIt insts_it = waitingList.begin();
 
-    while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+    while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
     {
         if ((*insts_it)->isSquashed()) {
             ++insts_it;
@@ -1333,6 +1380,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
     while (!instList.empty() && (*insts_it)->seqNum > sn)
     {
         if ((*insts_it)->isSquashed()) {
+            panic("Instruction should not be already squashed and on list!");
             ++insts_it;
             continue;
         }
@@ -1364,18 +1412,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
         --numInsts;
     }
 
-    insts_it = waitingList.begin();
-    while (!waitingList.empty() && insts_it != waitingList.end()) {
-        if ((*insts_it)->seqNum < sn) {
-            ++insts_it;
-            continue;
-        }
-        assert((*insts_it)->isSquashed());
-
-        waitingList.erase(insts_it++);
-        waitingInsts--;
-    }
-
     while (memBarrier && memBarrier->seqNum > sn) {
         DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
                 "squashed)\n", memBarrier->seqNum);
@@ -1393,6 +1429,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
         }
     }
 
+    insts_it = replayList.begin();
+    insts_end_it = replayList.end();
+    while (!replayList.empty() && insts_it != insts_end_it) {
+        if ((*insts_it)->seqNum < sn) {
+            ++insts_it;
+            continue;
+        }
+        assert((*insts_it)->isSquashed());
+
+        replayList.erase(insts_it++);
+    }
+
     frontEnd->addFreeRegs(freed_regs);
 }
 
@@ -1463,14 +1511,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
     frontEnd->squash(inst->seqNum - 1, inst->readPC());
 }
 
-template <class Impl>
-void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
-    faultFromFetch = fault;
-    fetchHasFault = true;
-}
-
 template <class Impl>
 void
 LWBackEnd<Impl>::switchOut()
@@ -1489,16 +1529,25 @@ LWBackEnd<Impl>::doSwitchOut()
     // yet written back.
     assert(robEmpty());
     assert(!LSQ.hasStoresToWB());
-
+    writeback.clear();
+    for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+        numInstsToWB.advance();
+
+//    squash(0);
+    assert(waitingList.empty());
+    assert(instList.empty());
+    assert(replayList.empty());
+    assert(writeback.empty());
     LSQ.switchOut();
-
-    squash(0);
 }
 
 template <class Impl>
 void
 LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
 {
+    assert(!squashPending);
+    squashSeqNum = 0;
+    squashNextPC = 0;
     xcSquash = false;
     trapSquash = false;
 
@@ -1641,6 +1690,45 @@ LWBackEnd<Impl>::dumpInsts()
         ++num;
     }
 
+    inst_list_it = --(writeback.end());
+
+    cprintf("Writeback list size: %i\n", writeback.size());
+
+    while (inst_list_it != writeback.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
     cprintf("Waiting list size: %i\n", waitingList.size());
 
     inst_list_it = --(waitingList.end());
index c0bf0b0feaa17fb8e22c93334d415ab7127eb1d8..07fd1aec5840d79de0c79512967256ef79fc1263 100644 (file)
@@ -110,6 +110,8 @@ class OzoneLWLSQ {
     /** Returns the name of the LSQ unit. */
     std::string name() const;
 
+    void regStats();
+
     /** Sets the CPU pointer. */
     void setCPU(FullCPU *cpu_ptr)
     { cpu = cpu_ptr; }
@@ -203,7 +205,7 @@ class OzoneLWLSQ {
     int numLoads() { return loads; }
 
     /** Returns the number of stores in the SQ. */
-    int numStores() { return stores; }
+    int numStores() { return stores + storesInFlight; }
 
     /** Returns if either the LQ or SQ is full. */
     bool isFull() { return lqFull() || sqFull(); }
@@ -212,7 +214,7 @@ class OzoneLWLSQ {
     bool lqFull() { return loads >= (LQEntries - 1); }
 
     /** Returns if the SQ is full. */
-    bool sqFull() { return stores >= (SQEntries - 1); }
+    bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
 
     /** Debugging function to dump instructions in the LSQ. */
     void dumpInsts();
@@ -241,7 +243,9 @@ class OzoneLWLSQ {
 
   private:
     /** Completes the store at the specified index. */
-    void completeStore(int store_idx);
+    void completeStore(DynInstPtr &inst);
+
+    void removeStore(int store_idx);
 
   private:
     /** Pointer to the CPU. */
@@ -342,6 +346,10 @@ class OzoneLWLSQ {
 
     int storesToWB;
 
+  public:
+    int storesInFlight;
+
+  private:
     /// @todo Consider moving to a more advanced model with write vs read ports
     /** The number of cache ports available each cycle. */
     int cachePorts;
@@ -351,6 +359,9 @@ class OzoneLWLSQ {
 
     //list<InstSeqNum> mshrSeqNums;
 
+    /** Tota number of memory ordering violations. */
+    Stats::Scalar<> lsqMemOrderViolation;
+
      //Stats::Scalar<> dcacheStallCycles;
     Counter lastDcacheStall;
 
index f72bbb1cc2699f0b4011a2914b6d169bfe5f9bfe..c60884fc3c6b0c83f3c9bef3d9d573964e41ed4c 100644 (file)
@@ -57,6 +57,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
 
 //    lsqPtr->cpu->wakeCPU();
     if (lsqPtr->isSwitchedOut()) {
+        panic("Should not be switched out!");
         if (wbEvent)
             delete wbEvent;
 
@@ -68,7 +69,11 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
         delete wbEvent;
     }
 
-    lsqPtr->completeStore(inst->sqIdx);
+    lsqPtr->completeStore(inst);
+    lsqPtr->removeStore(inst->sqIdx);
+    --(lsqPtr->storesInFlight);
+
+    DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
     if (miss)
         be->removeDcacheMiss(inst);
 }
@@ -82,7 +87,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
 
 template <class Impl>
 OzoneLWLSQ<Impl>::OzoneLWLSQ()
-    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+    : loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
       loadBlockedHandled(false)
 {
 }
@@ -121,6 +126,15 @@ OzoneLWLSQ<Impl>::name() const
     return "lsqunit";
 }
 
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::regStats()
+{
+    lsqMemOrderViolation
+        .name(name() + ".memOrderViolation")
+        .desc("Number of memory ordering violations");
+}
+
 template<class Impl>
 void
 OzoneLWLSQ<Impl>::clearLQ()
@@ -257,7 +271,7 @@ unsigned
 OzoneLWLSQ<Impl>::numFreeEntries()
 {
     unsigned free_lq_entries = LQEntries - loads;
-    unsigned free_sq_entries = SQEntries - stores;
+    unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
 
     // Both the LQ and SQ entries have an extra dummy entry to differentiate
     // empty/full conditions.  Subtract 1 from the free entries.
@@ -397,6 +411,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
                 // A load incorrectly passed this store.  Squash and refetch.
                 // For now return a fault to show that it was unsuccessful.
                 memDepViolator = (*lq_it);
+                ++lsqMemOrderViolation;
 
                 return TheISA::genMachineCheckFault();
             }
@@ -483,8 +498,8 @@ OzoneLWLSQ<Impl>::writebackStores()
 
         if ((*sq_it).size == 0 && !(*sq_it).completed) {
             sq_it--;
-            completeStore(inst->sqIdx);
-
+            removeStore(inst->sqIdx);
+            completeStore(inst);
             continue;
         }
 
@@ -540,6 +555,8 @@ OzoneLWLSQ<Impl>::writebackStores()
                 inst->sqIdx,inst->readPC(),
                 req->paddr, *(req->data),
                 inst->seqNum);
+        DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
+                storesInFlight + 1);
 
         if (dcacheInterface) {
             assert(!req->completionEvent);
@@ -601,6 +618,8 @@ OzoneLWLSQ<Impl>::writebackStores()
                 }
                 sq_it--;
             }
+            ++storesInFlight;
+//            removeStore(inst->sqIdx);
         } else {
             panic("Must HAVE DCACHE!!!!!\n");
         }
@@ -617,7 +636,7 @@ void
 OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
 {
     DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
-            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
 
 
     LQIt lq_it = loadQueue.begin();
@@ -732,7 +751,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
 
 template <class Impl>
 void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
+OzoneLWLSQ<Impl>::removeStore(int store_idx)
 {
     SQHashIt sq_hash_it = SQItHash.find(store_idx);
     assert(sq_hash_it != SQItHash.end());
@@ -742,8 +761,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
     (*sq_it).completed = true;
     DynInstPtr inst = (*sq_it).inst;
 
-    --storesToWB;
-
     if (isStalled() &&
         inst->seqNum == stallingStoreIsn) {
         DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@@ -761,6 +778,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
     SQItHash.erase(sq_hash_it);
     SQIndices.push(inst->sqIdx);
     storeQueue.erase(sq_it);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
+{
+    --storesToWB;
     --stores;
 
     inst->setCompleted();
@@ -839,9 +863,14 @@ OzoneLWLSQ<Impl>::switchOut()
     }
 
     // Clear the queue to free up resources
+    assert(stores == 0);
+    assert(storeQueue.empty());
+    assert(loads == 0);
+    assert(loadQueue.empty());
+    assert(storesInFlight == 0);
     storeQueue.clear();
     loadQueue.clear();
-    loads = stores = storesToWB = 0;
+    loads = stores = storesToWB = storesInFlight = 0;
 }
 
 template <class Impl>
index 7b5c6f67bf444865f8f5210a21fffeb71a607b0a..d28d040f808ac2f0d0abbb8195a9914d7770a02f 100644 (file)
@@ -70,10 +70,11 @@ class SimpleParams : public BaseCPU::Params
 
     unsigned cachePorts;
     unsigned width;
+    unsigned frontEndLatency;
     unsigned frontEndWidth;
+    unsigned backEndLatency;
     unsigned backEndWidth;
     unsigned backEndSquashLatency;
-    unsigned backEndLatency;
     unsigned maxInstBufferSize;
     unsigned numPhysicalRegs;
     unsigned maxOutstandingMemOps;
@@ -149,6 +150,7 @@ class SimpleParams : public BaseCPU::Params
     //
     unsigned LQEntries;
     unsigned SQEntries;
+    bool lsqLimits;
 
     //
     // Memory dependence