Ozone updates.

author Kevin Lim <ktlim@umich.edu>

Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)

committer Kevin Lim <ktlim@umich.edu>

Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
author Kevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
committer Kevin Lim <ktlim@umich.edu>
Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
diff --git a/cpu/ozone/front_end.hh b/cpu/ozone/front_end.hh

index dd382491fd5e8508fec587cdb066219b038c7601..b677e667c8fa185f6695a1f571667127c53ad67c 100644 (file)
--- a/cpu/ozone/front_end.hh
+++ b/cpu/ozone/front_end.hh
@@ -31,6 +31,7 @@
  
  #include <deque>
  
+#include "base/timebuf.hh"
  #include "cpu/inst_seq.hh"
  #include "cpu/o3/bpred_unit.hh"
  #include "cpu/ozone/rename_table.hh"
@@ -210,15 +211,21 @@ class FrontEnd
      void dumpInsts();
  
    private:
+    TimeBuffer<int> numInstsReady;
+
      typedef typename std::deque<DynInstPtr> InstBuff;
      typedef typename InstBuff::iterator InstBuffIt;
  
+    InstBuff feBuffer;
+
      InstBuff instBuffer;
  
      int instBufferSize;
  
      int maxInstBufferSize;
  
+    int latency;
+
      int width;
  
      int freeRegs;
diff --git a/cpu/ozone/front_end_impl.hh b/cpu/ozone/front_end_impl.hh

index ca9948b7dc598e39e167862bd526416666e26c72..09fc2e2f89e07dede523714c35f3ba068bdc8c0d 100644 (file)
--- a/cpu/ozone/front_end_impl.hh
+++ b/cpu/ozone/front_end_impl.hh
@@ -41,8 +41,10 @@ template <class Impl>
  FrontEnd<Impl>::FrontEnd(Params *params)
      : branchPred(params),
        icacheInterface(params->icacheInterface),
+      numInstsReady(params->frontEndLatency, 0),
        instBufferSize(0),
        maxInstBufferSize(params->maxInstBufferSize),
+      latency(params->frontEndLatency),
        width(params->frontEndWidth),
        freeRegs(params->numPhysicalRegs),
        numPhysRegs(params->numPhysicalRegs),
@@ -261,6 +263,18 @@ FrontEnd<Impl>::tick()
      if (switchedOut)
          return;
  
+    for (int insts_to_queue = numInstsReady[-latency];
+         !instBuffer.empty() && insts_to_queue;
+         --insts_to_queue)
+    {
+        DPRINTF(FE, "Transferring instruction [sn:%lli] to the feBuffer\n",
+                instBuffer.front()->seqNum);
+        feBuffer.push_back(instBuffer.front());
+        instBuffer.pop_front();
+    }
+
+    numInstsReady.advance();
+
      // @todo: Maybe I want to just have direct communication...
      if (fromCommit->doneSeqNum) {
          branchPred.update(fromCommit->doneSeqNum, 0);
@@ -349,6 +363,7 @@ FrontEnd<Impl>::tick()
          // latency
          instBuffer.push_back(inst);
          ++instBufferSize;
+        numInstsReady[0]++;
          ++num_inst;
  
  #if FULL_SYSTEM
@@ -570,6 +585,7 @@ FrontEnd<Impl>::handleFault(Fault &fault)
      instruction->fault = fault;
      instruction->setCanIssue();
      instBuffer.push_back(instruction);
+    numInstsReady[0]++;
      ++instBufferSize;
  }
  
@@ -599,6 +615,21 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
          freeRegs+= inst->numDestRegs();
      }
  
+    while (!feBuffer.empty() &&
+           feBuffer.back()->seqNum > squash_num) {
+        DynInstPtr inst = feBuffer.back();
+
+        DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n",
+                inst->seqNum, inst->readPC());
+
+        inst->clearDependents();
+
+        feBuffer.pop_back();
+        --instBufferSize;
+
+        freeRegs+= inst->numDestRegs();
+    }
+
      // Copy over rename table from the back end.
      renameTable.copyFrom(backEnd->renameTable);
  
@@ -633,13 +664,13 @@ template <class Impl>
  typename Impl::DynInstPtr
  FrontEnd<Impl>::getInst()
  {
-    if (instBufferSize == 0) {
+    if (feBuffer.empty()) {
          return NULL;
      }
  
-    DynInstPtr inst = instBuffer.front();
+    DynInstPtr inst = feBuffer.front();
  
-    instBuffer.pop_front();
+    feBuffer.pop_front();
  
      --instBufferSize;
  
@@ -857,6 +888,7 @@ FrontEnd<Impl>::doSwitchOut()
      squash(0, 0);
      instBuffer.clear();
      instBufferSize = 0;
+    feBuffer.clear();
      status = Idle;
  }
  
diff --git a/cpu/ozone/lw_back_end.hh b/cpu/ozone/lw_back_end.hh

index 19f2b2b6184a2e8dcd1bedb0b39e9f8f46e08cba..4e2f5606c1c9b9e32e396fff0482ee97e72d5d6d 100644 (file)
--- a/cpu/ozone/lw_back_end.hh
+++ b/cpu/ozone/lw_back_end.hh
@@ -78,7 +78,7 @@ class LWBackEnd
      TimeBuffer<IssueToExec> i2e;
      typename TimeBuffer<IssueToExec>::wire instsToExecute;
      TimeBuffer<ExecToCommit> e2c;
-    TimeBuffer<Writeback> numInstsToWB;
+    TimeBuffer<int> numInstsToWB;
  
      TimeBuffer<CommStruct> *comm;
      typename TimeBuffer<CommStruct>::wire toIEW;
@@ -157,7 +157,7 @@ class LWBackEnd
  
      Tick lastCommitCycle;
  
-    bool robEmpty() { return instList.empty(); }
+    bool robEmpty() { return numInsts == 0; }
  
      bool isFull() { return numInsts >= numROBEntries; }
      bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
@@ -212,6 +212,7 @@ class LWBackEnd
      }
  
      void instToCommit(DynInstPtr &inst);
+    void readyInstsForCommit();
  
      void switchOut();
      void doSwitchOut();
@@ -293,12 +294,13 @@ class LWBackEnd
  
      MemReqPtr memReq;
  
+    int latency;
+
      // General back end width. Used if the more specific isn't given.
      int width;
  
      // Dispatch width.
      int dispatchWidth;
-    int numDispatchEntries;
      int dispatchSize;
  
      int waitingInsts;
@@ -323,6 +325,7 @@ class LWBackEnd
  
      int numROBEntries;
      int numInsts;
+    bool lsqLimits;
  
      std::set<InstSeqNum> waitingMemOps;
      typedef std::set<InstSeqNum>::iterator MemIt;
@@ -333,9 +336,6 @@ class LWBackEnd
      InstSeqNum squashSeqNum;
      Addr squashNextPC;
  
-    Fault faultFromFetch;
-    bool fetchHasFault;
-
      bool switchedOut;
      bool switchPending;
  
@@ -359,8 +359,6 @@ class LWBackEnd
      std::list<DynInstPtr> replayList;
      std::list<DynInstPtr> writeback;
  
-    int latency;
-
      int squashLatency;
  
      bool exactFullStall;
@@ -397,9 +395,11 @@ class LWBackEnd
      Stats::Scalar<> lsqInversion;
  
      Stats::Vector<> nIssuedDist;
+/*
      Stats::VectorDistribution<> issueDelayDist;
  
      Stats::VectorDistribution<> queueResDist;
+*/
  /*
      Stats::Vector<> stat_fu_busy;
      Stats::Vector2d<> stat_fuBusy;
@@ -447,7 +447,7 @@ class LWBackEnd
  
      Stats::Vector<>  ROBCount;  // cumulative ROB occupancy
      Stats::Formula ROBOccRate;
-    Stats::VectorDistribution<> ROBOccDist;
+//    Stats::VectorDistribution<> ROBOccDist;
    public:
      void dumpInsts();
  
diff --git a/cpu/ozone/lw_back_end_impl.hh b/cpu/ozone/lw_back_end_impl.hh

index 9e1cd28cf13d2e4c4295342368d85484d2e810de..9a6ad4c14c94ba34df9bfb48ea3e22ea876e6e78 100644 (file)
--- a/cpu/ozone/lw_back_end_impl.hh
+++ b/cpu/ozone/lw_back_end_impl.hh
@@ -151,8 +151,10 @@ LWBackEnd<Impl>::LdWritebackEvent::process()
  
  //    iewStage->wakeCPU();
  
-    if (be->isSwitchedOut())
-        return;
+    assert(inst->isSquashed() || !be->isSwitchedOut());
+
+//    if (be->isSwitchedOut() && inst->isLoad())
+//        return;
  
      if (dcacheMiss) {
          be->removeDcacheMiss(inst);
@@ -208,14 +210,14 @@ LWBackEnd<Impl>::DCacheCompletionEvent::description()
  
  template <class Impl>
  LWBackEnd<Impl>::LWBackEnd(Params *params)
-    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(5, 5),
+    : d2i(5, 5), i2e(5, 5), e2c(5, 5), numInstsToWB(params->backEndLatency, 0),
        trapSquash(false), xcSquash(false), cacheCompletionEvent(this),
-      dcacheInterface(params->dcacheInterface), width(params->backEndWidth),
+      dcacheInterface(params->dcacheInterface), latency(params->backEndLatency),
+      width(params->backEndWidth), lsqLimits(params->lsqLimits),
        exactFullStall(true)
  {
      numROBEntries = params->numROBEntries;
      numInsts = 0;
-    numDispatchEntries = 32;
      maxOutstandingMemOps = params->maxOutstandingMemOps;
      numWaitingMemOps = 0;
      waitingInsts = 0;
@@ -251,6 +253,8 @@ void
  LWBackEnd<Impl>::regStats()
  {
      using namespace Stats;
+    LSQ.regStats();
+
      robCapEvents
          .init(cpu->number_of_threads)
          .name(name() + ".ROB:cap_events")
@@ -377,6 +381,7 @@ LWBackEnd<Impl>::regStats()
          .desc("Number of insts issued each cycle")
          .flags(total | pdf | dist)
          ;
+/*
      issueDelayDist
          .init(Num_OpClasses,0,99,2)
          .name(name() + ".ISSUE:")
@@ -393,7 +398,7 @@ LWBackEnd<Impl>::regStats()
      for (int i = 0; i < Num_OpClasses; ++i) {
          queueResDist.subname(i, opClassStrings[i]);
      }
-
+*/
      writebackCount
          .init(cpu->number_of_threads)
          .name(name() + ".WB:count")
@@ -555,13 +560,14 @@ LWBackEnd<Impl>::regStats()
          .flags(total)
          ;
      ROBOccRate = ROBCount / cpu->numCycles;
-
+/*
      ROBOccDist
          .init(cpu->number_of_threads,0,numROBEntries,2)
          .name(name() + ".ROB:occ_dist")
          .desc("ROB Occupancy per cycle")
          .flags(total | cdf)
          ;
+*/
  }
  
  template <class Impl>
@@ -654,18 +660,22 @@ LWBackEnd<Impl>::tick()
  {
      DPRINTF(BE, "Ticking back end\n");
  
+    // Read in any done instruction information and update the IQ or LSQ.
+    updateStructures();
+
      if (switchPending && robEmpty() && !LSQ.hasStoresToWB()) {
          cpu->signalSwitched();
          return;
      }
  
+    readyInstsForCommit();
+
+    numInstsToWB.advance();
+
      ROBCount[0]+= numInsts;
  
      wbCycle = 0;
  
-    // Read in any done instruction information and update the IQ or LSQ.
-    updateStructures();
-
  #if FULL_SYSTEM
      checkInterrupts();
  
@@ -740,6 +750,10 @@ LWBackEnd<Impl>::dispatchInsts()
      while (numInsts < numROBEntries &&
             numWaitingMemOps < maxOutstandingMemOps) {
          // Get instruction from front of time buffer
+        if (lsqLimits && LSQ.isFull()) {
+            break;
+        }
+
          DynInstPtr inst = frontEnd->getInst();
          if (!inst) {
              break;
@@ -798,6 +812,7 @@ LWBackEnd<Impl>::dispatchInsts()
                  inst->setIssued();
                  inst->setExecuted();
                  inst->setCanCommit();
+                numInstsToWB[0]++;
              } else {
                  DPRINTF(BE, "Instruction [sn:%lli] ready, addding to "
                          "exeList.\n",
@@ -987,16 +1002,10 @@ template<class Impl>
  void
  LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
  {
-
      DPRINTF(BE, "Sending instructions to commit [sn:%lli] PC %#x.\n",
              inst->seqNum, inst->readPC());
  
      if (!inst->isSquashed()) {
-        DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
-                inst->seqNum, inst->readPC());
-
-        inst->setCanCommit();
-
          if (inst->isExecuted()) {
              inst->setResultReady();
              int dependents = wakeDependents(inst);
@@ -1007,8 +1016,32 @@ LWBackEnd<Impl>::instToCommit(DynInstPtr &inst)
          }
      }
  
+    writeback.push_back(inst);
+
+    numInstsToWB[0]++;
+
      writebackCount[0]++;
  }
+
+template <class Impl>
+void
+LWBackEnd<Impl>::readyInstsForCommit()
+{
+    for (int i = numInstsToWB[-latency];
+         !writeback.empty() && i;
+         --i)
+    {
+        DynInstPtr inst = writeback.front();
+        writeback.pop_front();
+        if (!inst->isSquashed()) {
+            DPRINTF(BE, "Writing back instruction [sn:%lli] PC %#x.\n",
+                    inst->seqNum, inst->readPC());
+
+            inst->setCanCommit();
+        }
+    }
+}
+
  #if 0
  template <class Impl>
  void
@@ -1221,6 +1254,20 @@ LWBackEnd<Impl>::commitInst(int inst_num)
          ++freed_regs;
      }
  
+#if FULL_SYSTEM
+    if (thread->profile) {
+//        bool usermode =
+//            (xc->readMiscReg(AlphaISA::IPR_DTB_CM) & 0x18) != 0;
+//        thread->profilePC = usermode ? 1 : inst->readPC();
+        thread->profilePC = inst->readPC();
+        ProfileNode *node = thread->profile->consume(thread->getXCProxy(),
+                                                     inst->staticInst);
+
+        if (node)
+            thread->profileNode = node;
+    }
+#endif
+
      if (inst->traceData) {
          inst->traceData->setFetchSeq(inst->seqNum);
          inst->traceData->setCPSeq(thread->numInst);
@@ -1280,9 +1327,9 @@ LWBackEnd<Impl>::commitInsts()
      while (!instList.empty() && inst_num < commitWidth) {
          if (instList.back()->isSquashed()) {
              instList.back()->clearDependents();
+            ROBSquashedInsts[instList.back()->threadNumber]++;
              instList.pop_back();
              --numInsts;
-            ROBSquashedInsts[instList.back()->threadNumber]++;
              continue;
          }
  
@@ -1304,10 +1351,10 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
      LSQ.squash(sn);
  
      int freed_regs = 0;
-    InstListIt waiting_list_end = waitingList.end();
+    InstListIt insts_end_it = waitingList.end();
      InstListIt insts_it = waitingList.begin();
  
-    while (insts_it != waiting_list_end && (*insts_it)->seqNum > sn)
+    while (insts_it != insts_end_it && (*insts_it)->seqNum > sn)
      {
          if ((*insts_it)->isSquashed()) {
              ++insts_it;
@@ -1333,6 +1380,7 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
      while (!instList.empty() && (*insts_it)->seqNum > sn)
      {
          if ((*insts_it)->isSquashed()) {
+            panic("Instruction should not be already squashed and on list!");
              ++insts_it;
              continue;
          }
@@ -1364,18 +1412,6 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
          --numInsts;
      }
  
-    insts_it = waitingList.begin();
-    while (!waitingList.empty() && insts_it != waitingList.end()) {
-        if ((*insts_it)->seqNum < sn) {
-            ++insts_it;
-            continue;
-        }
-        assert((*insts_it)->isSquashed());
-
-        waitingList.erase(insts_it++);
-        waitingInsts--;
-    }
-
      while (memBarrier && memBarrier->seqNum > sn) {
          DPRINTF(BE, "[sn:%lli] Memory barrier squashed (or previously "
                  "squashed)\n", memBarrier->seqNum);
@@ -1393,6 +1429,18 @@ LWBackEnd<Impl>::squash(const InstSeqNum &sn)
          }
      }
  
+    insts_it = replayList.begin();
+    insts_end_it = replayList.end();
+    while (!replayList.empty() && insts_it != insts_end_it) {
+        if ((*insts_it)->seqNum < sn) {
+            ++insts_it;
+            continue;
+        }
+        assert((*insts_it)->isSquashed());
+
+        replayList.erase(insts_it++);
+    }
+
      frontEnd->addFreeRegs(freed_regs);
  }
  
@@ -1463,14 +1511,6 @@ LWBackEnd<Impl>::squashDueToMemBlocked(DynInstPtr &inst)
      frontEnd->squash(inst->seqNum - 1, inst->readPC());
  }
  
-template <class Impl>
-void
-LWBackEnd<Impl>::fetchFault(Fault &fault)
-{
-    faultFromFetch = fault;
-    fetchHasFault = true;
-}
-
  template <class Impl>
  void
  LWBackEnd<Impl>::switchOut()
@@ -1489,16 +1529,25 @@ LWBackEnd<Impl>::doSwitchOut()
      // yet written back.
      assert(robEmpty());
      assert(!LSQ.hasStoresToWB());
-
+    writeback.clear();
+    for (int i = 0; i < numInstsToWB.getSize() + 1; ++i)
+        numInstsToWB.advance();
+
+//    squash(0);
+    assert(waitingList.empty());
+    assert(instList.empty());
+    assert(replayList.empty());
+    assert(writeback.empty());
      LSQ.switchOut();
-
-    squash(0);
  }
  
  template <class Impl>
  void
  LWBackEnd<Impl>::takeOverFrom(ExecContext *old_xc)
  {
+    assert(!squashPending);
+    squashSeqNum = 0;
+    squashNextPC = 0;
      xcSquash = false;
      trapSquash = false;
  
@@ -1641,6 +1690,45 @@ LWBackEnd<Impl>::dumpInsts()
          ++num;
      }
  
+    inst_list_it = --(writeback.end());
+
+    cprintf("Writeback list size: %i\n", writeback.size());
+
+    while (inst_list_it != writeback.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed still count
+                // towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it--;
+        ++num;
+    }
+
      cprintf("Waiting list size: %i\n", waitingList.size());
  
      inst_list_it = --(waitingList.end());
diff --git a/cpu/ozone/lw_lsq.hh b/cpu/ozone/lw_lsq.hh

index c0bf0b0feaa17fb8e22c93334d415ab7127eb1d8..07fd1aec5840d79de0c79512967256ef79fc1263 100644 (file)
--- a/cpu/ozone/lw_lsq.hh
+++ b/cpu/ozone/lw_lsq.hh
@@ -110,6 +110,8 @@ class OzoneLWLSQ {
      /** Returns the name of the LSQ unit. */
      std::string name() const;
  
+    void regStats();
+
      /** Sets the CPU pointer. */
      void setCPU(FullCPU *cpu_ptr)
      { cpu = cpu_ptr; }
@@ -203,7 +205,7 @@ class OzoneLWLSQ {
      int numLoads() { return loads; }
  
      /** Returns the number of stores in the SQ. */
-    int numStores() { return stores; }
+    int numStores() { return stores + storesInFlight; }
  
      /** Returns if either the LQ or SQ is full. */
      bool isFull() { return lqFull() || sqFull(); }
@@ -212,7 +214,7 @@ class OzoneLWLSQ {
      bool lqFull() { return loads >= (LQEntries - 1); }
  
      /** Returns if the SQ is full. */
-    bool sqFull() { return stores >= (SQEntries - 1); }
+    bool sqFull() { return (stores + storesInFlight) >= (SQEntries - 1); }
  
      /** Debugging function to dump instructions in the LSQ. */
      void dumpInsts();
@@ -241,7 +243,9 @@ class OzoneLWLSQ {
  
    private:
      /** Completes the store at the specified index. */
-    void completeStore(int store_idx);
+    void completeStore(DynInstPtr &inst);
+
+    void removeStore(int store_idx);
  
    private:
      /** Pointer to the CPU. */
@@ -342,6 +346,10 @@ class OzoneLWLSQ {
  
      int storesToWB;
  
+  public:
+    int storesInFlight;
+
+  private:
      /// @todo Consider moving to a more advanced model with write vs read ports
      /** The number of cache ports available each cycle. */
      int cachePorts;
@@ -351,6 +359,9 @@ class OzoneLWLSQ {
  
      //list<InstSeqNum> mshrSeqNums;
  
+    /** Tota number of memory ordering violations. */
+    Stats::Scalar<> lsqMemOrderViolation;
+
       //Stats::Scalar<> dcacheStallCycles;
      Counter lastDcacheStall;
  
diff --git a/cpu/ozone/lw_lsq_impl.hh b/cpu/ozone/lw_lsq_impl.hh

index f72bbb1cc2699f0b4011a2914b6d169bfe5f9bfe..c60884fc3c6b0c83f3c9bef3d9d573964e41ed4c 100644 (file)
--- a/cpu/ozone/lw_lsq_impl.hh
+++ b/cpu/ozone/lw_lsq_impl.hh
@@ -57,6 +57,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
  
  //    lsqPtr->cpu->wakeCPU();
      if (lsqPtr->isSwitchedOut()) {
+        panic("Should not be switched out!");
          if (wbEvent)
              delete wbEvent;
  
@@ -68,7 +69,11 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
          delete wbEvent;
      }
  
-    lsqPtr->completeStore(inst->sqIdx);
+    lsqPtr->completeStore(inst);
+    lsqPtr->removeStore(inst->sqIdx);
+    --(lsqPtr->storesInFlight);
+
+    DPRINTF(OzoneLSQ, "StoresInFlight: %i\n", lsqPtr->storesInFlight);
      if (miss)
          be->removeDcacheMiss(inst);
  }
@@ -82,7 +87,7 @@ OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
  
  template <class Impl>
  OzoneLWLSQ<Impl>::OzoneLWLSQ()
-    : loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
+    : loads(0), stores(0), storesToWB(0), storesInFlight(0), stalled(false), isLoadBlocked(false),
        loadBlockedHandled(false)
  {
  }
@@ -121,6 +126,15 @@ OzoneLWLSQ<Impl>::name() const
      return "lsqunit";
  }
  
+template<class Impl>
+void
+OzoneLWLSQ<Impl>::regStats()
+{
+    lsqMemOrderViolation
+        .name(name() + ".memOrderViolation")
+        .desc("Number of memory ordering violations");
+}
+
  template<class Impl>
  void
  OzoneLWLSQ<Impl>::clearLQ()
@@ -257,7 +271,7 @@ unsigned
  OzoneLWLSQ<Impl>::numFreeEntries()
  {
      unsigned free_lq_entries = LQEntries - loads;
-    unsigned free_sq_entries = SQEntries - stores;
+    unsigned free_sq_entries = SQEntries - (stores + storesInFlight);
  
      // Both the LQ and SQ entries have an extra dummy entry to differentiate
      // empty/full conditions.  Subtract 1 from the free entries.
@@ -397,6 +411,7 @@ OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
                  // A load incorrectly passed this store.  Squash and refetch.
                  // For now return a fault to show that it was unsuccessful.
                  memDepViolator = (*lq_it);
+                ++lsqMemOrderViolation;
  
                  return TheISA::genMachineCheckFault();
              }
@@ -483,8 +498,8 @@ OzoneLWLSQ<Impl>::writebackStores()
  
          if ((*sq_it).size == 0 && !(*sq_it).completed) {
              sq_it--;
-            completeStore(inst->sqIdx);
-
+            removeStore(inst->sqIdx);
+            completeStore(inst);
              continue;
          }
  
@@ -540,6 +555,8 @@ OzoneLWLSQ<Impl>::writebackStores()
                  inst->sqIdx,inst->readPC(),
                  req->paddr, *(req->data),
                  inst->seqNum);
+        DPRINTF(OzoneLSQ, "StoresInFlight: %i\n",
+                storesInFlight + 1);
  
          if (dcacheInterface) {
              assert(!req->completionEvent);
@@ -601,6 +618,8 @@ OzoneLWLSQ<Impl>::writebackStores()
                  }
                  sq_it--;
              }
+            ++storesInFlight;
+//            removeStore(inst->sqIdx);
          } else {
              panic("Must HAVE DCACHE!!!!!\n");
          }
@@ -617,7 +636,7 @@ void
  OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
  {
      DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
-            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores+storesInFlight);
  
  
      LQIt lq_it = loadQueue.begin();
@@ -732,7 +751,7 @@ OzoneLWLSQ<Impl>::dumpInsts()
  
  template <class Impl>
  void
-OzoneLWLSQ<Impl>::completeStore(int store_idx)
+OzoneLWLSQ<Impl>::removeStore(int store_idx)
  {
      SQHashIt sq_hash_it = SQItHash.find(store_idx);
      assert(sq_hash_it != SQItHash.end());
@@ -742,8 +761,6 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
      (*sq_it).completed = true;
      DynInstPtr inst = (*sq_it).inst;
  
-    --storesToWB;
-
      if (isStalled() &&
          inst->seqNum == stallingStoreIsn) {
          DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
@@ -761,6 +778,13 @@ OzoneLWLSQ<Impl>::completeStore(int store_idx)
      SQItHash.erase(sq_hash_it);
      SQIndices.push(inst->sqIdx);
      storeQueue.erase(sq_it);
+}
+
+template <class Impl>
+void
+OzoneLWLSQ<Impl>::completeStore(DynInstPtr &inst)
+{
+    --storesToWB;
      --stores;
  
      inst->setCompleted();
@@ -839,9 +863,14 @@ OzoneLWLSQ<Impl>::switchOut()
      }
  
      // Clear the queue to free up resources
+    assert(stores == 0);
+    assert(storeQueue.empty());
+    assert(loads == 0);
+    assert(loadQueue.empty());
+    assert(storesInFlight == 0);
      storeQueue.clear();
      loadQueue.clear();
-    loads = stores = storesToWB = 0;
+    loads = stores = storesToWB = storesInFlight = 0;
  }
  
  template <class Impl>
diff --git a/cpu/ozone/simple_params.hh b/cpu/ozone/simple_params.hh

index 7b5c6f67bf444865f8f5210a21fffeb71a607b0a..d28d040f808ac2f0d0abbb8195a9914d7770a02f 100644 (file)
--- a/cpu/ozone/simple_params.hh
+++ b/cpu/ozone/simple_params.hh
@@ -70,10 +70,11 @@ class SimpleParams : public BaseCPU::Params
  
      unsigned cachePorts;
      unsigned width;
+    unsigned frontEndLatency;
      unsigned frontEndWidth;
+    unsigned backEndLatency;
      unsigned backEndWidth;
      unsigned backEndSquashLatency;
-    unsigned backEndLatency;
      unsigned maxInstBufferSize;
      unsigned numPhysicalRegs;
      unsigned maxOutstandingMemOps;
@@ -149,6 +150,7 @@ class SimpleParams : public BaseCPU::Params
      //
      unsigned LQEntries;
      unsigned SQEntries;
+    bool lsqLimits;
  
      //
      // Memory dependence
author	Kevin Lim <ktlim@umich.edu>
	Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
committer	Kevin Lim <ktlim@umich.edu>
	Thu, 24 Aug 2006 21:45:04 +0000 (17:45 -0400)
cpu/ozone/front_end.hh		patch \| blob \| history
cpu/ozone/front_end_impl.hh		patch \| blob \| history
cpu/ozone/lw_back_end.hh		patch \| blob \| history
cpu/ozone/lw_back_end_impl.hh		patch \| blob \| history
cpu/ozone/lw_lsq.hh		patch \| blob \| history
cpu/ozone/lw_lsq_impl.hh		patch \| blob \| history
cpu/ozone/simple_params.hh		patch \| blob \| history