O3 code update/cleanup.

author Kevin Lim <ktlim@umich.edu>

Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)

committer Kevin Lim <ktlim@umich.edu>

Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)
author Kevin Lim <ktlim@umich.edu>
Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)
committer Kevin Lim <ktlim@umich.edu>
Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)
diff --git a/cpu/o3/2bit_local_pred.cc b/cpu/o3/2bit_local_pred.cc

index eab98531dac974ba3dfb31da53b3a750cdc7c44d..c3fb2fdb8bd77548366f720249d54e5513ccf369 100644 (file)
--- a/cpu/o3/2bit_local_pred.cc
+++ b/cpu/o3/2bit_local_pred.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/2bit_local_pred.hh b/cpu/o3/2bit_local_pred.hh

index 0dfe53819b8d09662a5e86b0d9f2832337b16168..cd65978ca3488a3cfbbc668b2f364db6ef08cdf4 100644 (file)
--- a/cpu/o3/2bit_local_pred.hh
+++ b/cpu/o3/2bit_local_pred.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh

index f70793aaa7efd8160db14c4684ad5a4ab60fe12a..78ad5f7d85b27e59e3992590463ccdfe904343e4 100644 (file)
--- a/cpu/o3/alpha_cpu.hh
+++ b/cpu/o3/alpha_cpu.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -87,7 +87,8 @@ class AlphaFullCPU : public FullO3CPU<Impl>
  
          virtual Status status() const { return thread->status(); }
  
-        virtual void setStatus(Status new_status) { thread->setStatus(new_status); }
+        virtual void setStatus(Status new_status)
+        { thread->setStatus(new_status); }
  
          /// Set the status to Active.  Optional delay indicates number of
          /// cycles to wait before beginning execution.
@@ -168,12 +169,15 @@ class AlphaFullCPU : public FullO3CPU<Impl>
          virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
  
          // @todo: Figure out where these store cond failures should go.
-        virtual unsigned readStCondFailures() { return thread->storeCondFailures; }
+        virtual unsigned readStCondFailures()
+        { return thread->storeCondFailures; }
  
-        virtual void setStCondFailures(unsigned sc_failures) { thread->storeCondFailures = sc_failures; }
+        virtual void setStCondFailures(unsigned sc_failures)
+        { thread->storeCondFailures = sc_failures; }
  
  #if FULL_SYSTEM
-        virtual bool inPalMode() { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
+        virtual bool inPalMode()
+        { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
  #endif
  
          // Only really makes sense for old CPU model.  Lots of code
@@ -194,10 +198,6 @@ class AlphaFullCPU : public FullO3CPU<Impl>
  #endif
      };
  
-//    friend class AlphaXC;
-
-//    std::vector<ExecContext *> xcProxies;
-
  #if FULL_SYSTEM
      /** ITB pointer. */
      AlphaITB *itb;
diff --git a/cpu/o3/bpred_unit.cc b/cpu/o3/bpred_unit.cc

index a78dcf463c0f9326eab4590e710850a015361b0a..92344111fede8cb374cb54912fcb3ea4f334d954 100644 (file)
--- a/cpu/o3/bpred_unit.cc
+++ b/cpu/o3/bpred_unit.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
diff --git a/cpu/o3/bpred_unit.hh b/cpu/o3/bpred_unit.hh

index ee7ffc1837f6b513c4d54bb4eaf40924f2d714d3..b7814b2e904f3dd8b56b7f2838a6ab9205e878fb 100644 (file)
--- a/cpu/o3/bpred_unit.hh
+++ b/cpu/o3/bpred_unit.hh
@@ -43,12 +43,7 @@
  
  /**
   * Basically a wrapper class to hold both the branch predictor
- * and the BTB.  Right now I'm unsure of the implementation; it would
- * be nicer to have something closer to the CPUPolicy or the Impl where
- * this is just typedefs, but it forces the upper level stages to be
- * aware of the constructors of the BP and the BTB.  The nicer thing
- * to do is have this templated on the Impl, accept the usual Params
- * object, and be able to call the constructors on the BP and BTB.
+ * and the BTB.
   */
  template<class Impl>
  class TwobitBPredUnit
diff --git a/cpu/o3/bpred_unit_impl.hh b/cpu/o3/bpred_unit_impl.hh

index d20b31e555b1fabbe9c5a5839220842062db202a..c37df606bdfa8a19186b011643876f76e33f4126 100644 (file)
--- a/cpu/o3/bpred_unit_impl.hh
+++ b/cpu/o3/bpred_unit_impl.hh
@@ -26,13 +26,13 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
+#include <list>
+#include <vector>
+
  #include "base/trace.hh"
  #include "base/traceflags.hh"
  #include "cpu/o3/bpred_unit.hh"
  
-#include <vector>
-#include <list>
-
  using namespace std;
  
  template<class Impl>
diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh

index 1a8f394ca110838cd3c452f821c54c8c08bb8fbf..c36c58d3d04bef8e17d661f8f2475d59dd979cb3 100644 (file)
--- a/cpu/o3/comm.hh
+++ b/cpu/o3/comm.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -169,10 +169,6 @@ struct TimeBufStruct {
          bool commitInsts;
          InstSeqNum squashSeqNum;
  
-        // Extra bit of information so that the LDSTQ only updates when it
-        // needs to.
-        bool commitIsLoad;
-
          // Communication specifically to the IQ to tell the IQ that it can
          // schedule a non-speculative instruction.
          InstSeqNum nonSpecSeqNum;
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh

index 73eccd2b067e28033dda88cdfdacc7a5e233fe34..66abf8dc6b3e3c3ddf482098283c53d8cb2df0c6 100644 (file)
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -30,10 +30,10 @@
  #define __CPU_O3_COMMIT_HH__
  
  #include "arch/faults.hh"
-#include "cpu/inst_seq.hh"
  #include "base/statistics.hh"
  #include "base/timebuf.hh"
  #include "cpu/exetrace.hh"
+#include "cpu/inst_seq.hh"
  #include "mem/memory_interface.hh"
  
  template <class>
@@ -59,8 +59,7 @@ class O3ThreadState;
   * squashing instruction's sequence number, and only broadcasting a
   * redirect if it corresponds to an older instruction. Commit also
   * supports multiple cycle squashing, to model a ROB that can only
- * remove a certain number of instructions per cycle. Eventually traps
- * and interrupts will most likely be handled here as well.
+ * remove a certain number of instructions per cycle.
   */
  template<class Impl>
  class DefaultCommit
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh

index 170f5b01f0ee2a7e943cba9c2385b18116c4a804..346a8bc1c3bac146a1020a7409f7bd227ea10df8 100644 (file)
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -27,12 +27,7 @@
   */
  
  #include <algorithm>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <stdio.h>
-#include <string.h>
+#include <string>
  
  #include "base/loader/symtab.hh"
  #include "base/timebuf.hh"
@@ -835,58 +830,6 @@ DefaultCommit<Impl>::commitInsts()
      unsigned num_committed = 0;
  
      DynInstPtr head_inst;
-#if FULL_SYSTEM
-    // Not the best way to check if the front end is empty, but it should
-    // work.
-    // @todo: Try to avoid directly accessing fetch.
-    if (commitStatus[0] == FetchTrapPending && rob->isEmpty()) {
-        DPRINTF(Commit, "Fault from fetch is pending.\n");
-
-        fetchTrapWait++;
-        if (fetchTrapWait > 10000000) {
-            panic("Fetch trap has been pending for a long time!");
-        }
-        if (fetchFaultTick > curTick) {
-            DPRINTF(Commit, "Not enough cycles since fault, fault will "
-                    "happen on %lli\n",
-                    fetchFaultTick);
-            cpu->activityThisCycle();
-            return;
-        } else if (iewStage->hasStoresToWB()) {
-            DPRINTF(Commit, "IEW still has stores to WB.  Waiting until "
-                    "they are completed. fetchTrapWait:%i\n",
-                    fetchTrapWait);
-            cpu->activityThisCycle();
-            return;
-        } else if (cpu->inPalMode(readPC())) {
-            DPRINTF(Commit, "In pal mode right now. fetchTrapWait:%i\n",
-                    fetchTrapWait);
-            return;
-        } else if (fetchStage->getYoungestSN() > youngestSeqNum[0]) {
-            DPRINTF(Commit, "Waiting for front end to drain. fetchTrapWait:%i\n",
-                    fetchTrapWait);
-            return;
-        }
-        fetchTrapWait = 0;
-        DPRINTF(Commit, "ROB is empty, handling fetch trap.\n");
-
-        assert(!thread[0]->inSyscall);
-
-        thread[0]->inSyscall = true;
-
-        // Consider holding onto the trap and waiting until the trap event
-        // happens for this to be executed.
-        cpu->trap(fetchFault, 0);
-
-        // Exit state update mode to avoid accidental updating.
-        thread[0]->inSyscall = false;
-
-        commitStatus[0] = TrapPending;
-        // Set it up so that we squash next cycle
-        trapSquash[0] = true;
-        return;
-    }
-#endif
  
      // Commit as many instructions as possible until the commit bandwidth
      // limit is reached, or it becomes impossible to commit any more.
diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh

index 3f3f6824796cd2ae6d9d4454f35f0e297d0af82c..3035b3387b52ba6b6413fab0b61952641f40c014 100644 (file)
--- a/cpu/o3/decode.hh
+++ b/cpu/o3/decode.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -35,11 +35,11 @@
  #include "base/timebuf.hh"
  
  /**
- * DefaultDecode class handles both single threaded and SMT decode. Its width is
- * specified by the parameters; each cycles it tries to decode that many
- * instructions. Because instructions are actually decoded when the StaticInst
- * is created, this stage does not do much other than check any PC-relative
- * branches.
+ * DefaultDecode class handles both single threaded and SMT
+ * decode. Its width is specified by the parameters; each cycles it
+ * tries to decode that many instructions. Because instructions are
+ * actually decoded when the StaticInst is created, this stage does
+ * not do much other than check any PC-relative branches.
   */
  template<class Impl>
  class DefaultDecode
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh

index a419a89322cd4ef0bb08c02de3d21def6c423b2c..2ed7ec6fc16ff197d0c33300b65f364f8c2420d3 100644 (file)
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,6 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
        decodeWidth(params->decodeWidth),
        numThreads(params->numberOfThreads)
  {
-    DPRINTF(Decode, "decodeWidth=%i.\n", decodeWidth);
      _status = Inactive;
  
      for (int i = 0; i < numThreads; ++i) {
@@ -249,8 +248,6 @@ template<class Impl>
  bool
  DefaultDecode<Impl>::unblock(unsigned tid)
  {
-    DPRINTF(Decode, "[tid:%u]: Trying to unblock.\n", tid);
-
      // Decode is done unblocking only if the skid buffer is empty.
      if (skidBuffer[tid].empty()) {
          DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
@@ -261,6 +258,8 @@ DefaultDecode<Impl>::unblock(unsigned tid)
          return true;
      }
  
+    DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
+
      return false;
  }
  
@@ -318,6 +317,7 @@ DefaultDecode<Impl>::squash(unsigned tid)
          // In syscall emulation, we can have both a block and a squash due
          // to a syscall in the same cycle.  This would cause both signals to
          // be high.  This shouldn't happen in full system.
+        // @todo: Determine if this still happens.
          if (toFetch->decodeBlock[tid]) {
              toFetch->decodeBlock[tid] = 0;
          } else {
@@ -372,7 +372,7 @@ DefaultDecode<Impl>::skidInsert(unsigned tid)
          skidBuffer[tid].push(inst);
      }
  
-    // Eventually need to enforce this by not letting a thread
+    // @todo: Eventually need to enforce this by not letting a thread
      // fetch past its skidbuffer
      assert(skidBuffer[tid].size() <= skidBufferMax);
  }
@@ -436,10 +436,10 @@ void
  DefaultDecode<Impl>::sortInsts()
  {
      int insts_from_fetch = fromFetch->size;
-
+#ifdef DEBUG
      for (int i=0; i < numThreads; i++)
          assert(insts[i].empty());
-
+#endif
      for (int i = 0; i < insts_from_fetch; ++i) {
          insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
      }
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh

index b03d4afe37fd15cf668e56b680e028e2f7094363..3fcfdc3a171fee9ca87d1fd05fab7966999222bb 100644 (file)
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -38,12 +38,12 @@
  class Sampler;
  
  /**
- * DefaultFetch class handles both single threaded and SMT fetch. Its width is
- * specified by the parameters; each cycle it tries to fetch that many
- * instructions. It supports using a branch predictor to predict direction and
- * targets.
- * It supports the idling functionalitiy of the CPU by indicating to the CPU
- * when it is active and inactive.
+ * DefaultFetch class handles both single threaded and SMT fetch. Its
+ * width is specified by the parameters; each cycle it tries to fetch
+ * that many instructions. It supports using a branch predictor to
+ * predict direction and targets.
+ * It supports the idling functionalitiy of the CPU by indicating to
+ * the CPU when it is active and inactive.
   */
  template <class Impl>
  class DefaultFetch
@@ -66,8 +66,8 @@ class DefaultFetch
      typedef TheISA::ExtMachInst ExtMachInst;
  
    public:
-    /** Overall fetch status. Used to determine if the CPU can deschedule itsef
-     * due to a lack of activity.
+    /** Overall fetch status. Used to determine if the CPU can
+     * deschedule itsef due to a lack of activity.
       */
      enum FetchStatus {
          Active,
@@ -174,13 +174,13 @@ class DefaultFetch
      void wakeFromQuiesce();
  
    private:
-    /** Changes the status of this stage to active, and indicates this to the
-     * CPU.
+    /** Changes the status of this stage to active, and indicates this
+     * to the CPU.
       */
      inline void switchToActive();
  
-    /** Changes the status of this stage to inactive, and indicates this to the
-     * CPU.
+    /** Changes the status of this stage to inactive, and indicates
+     * this to the CPU.
       */
      inline void switchToInactive();
  
@@ -373,11 +373,6 @@ class DefaultFetch
  
      bool switchedOut;
  
-  public:
-    InstSeqNum &getYoungestSN() { return youngestSN; }
-  private:
-    InstSeqNum youngestSN;
-
  #if !FULL_SYSTEM
      /** Page table pointer. */
  //    PageTable *pTable;
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh

index 523719945fd9d8ab32e20d8078fe329a64dd6951..1c5e508f6a9da4a7a66e104723307d3021fb2f5a 100644 (file)
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -938,10 +938,6 @@ DefaultFetch<Impl>::fetch(bool &status_change)
          DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "
                  "decode.\n",tid);
  
-        //////////////////////////
-        // Fetch first instruction
-        //////////////////////////
-
          // Need to keep track of whether or not a predicted branch
          // ended this fetch block.
          bool predicted_branch = false;
@@ -1004,7 +1000,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
              fetch_PC = next_PC;
  
              if (instruction->isQuiesce()) {
-                warn("%lli: Quiesce instruction encountered, halting fetch!", curTick);
+                warn("%lli: Quiesce instruction encountered, halting fetch!",
+                     curTick);
                  fetchStatus[tid] = QuiescePending;
                  ++numInst;
                  status_change = true;
@@ -1022,24 +1019,20 @@ DefaultFetch<Impl>::fetch(bool &status_change)
      // Now that fetching is completed, update the PC to signify what the next
      // cycle will be.
      if (fault == NoFault) {
-
          DPRINTF(Fetch, "[tid:%i]: Setting PC to %08p.\n",tid, next_PC);
  
-
          PC[tid] = next_PC;
          nextPC[tid] = next_PC + instSize;
      } else {
-        // If the issue was an icache miss, then we can just return and
-        // wait until it is handled.
+        // We shouldn't be in an icache miss and also have a fault (an ITB
+        // miss)
          if (fetchStatus[tid] == IcacheMissStall) {
              panic("Fetch should have exited prior to this!");
          }
  
-        // Handle the fault.
-        // This stage will not be able to continue until all the ROB
-        // slots are empty, at which point the fault can be handled.
-        // The only other way it can wake up is if a squash comes along
-        // and changes the PC.
+        // Send the fault to commit.  This thread will not do anything
+        // until commit handles the fault.  The only other way it can
+        // wake up is if a squash comes along and changes the PC.
  #if FULL_SYSTEM
          assert(numInst != fetchWidth);
          // Get a sequence number.
@@ -1067,20 +1060,12 @@ DefaultFetch<Impl>::fetch(bool &status_change)
          toDecode->insts[numInst] = instruction;
          toDecode->size++;
  
-        // Tell the commit stage the fault we had.
-//        toDecode->fetchFault = fault;
-//        toDecode->fetchFaultSN = cpu->globalSeqNum;
-
          DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid);
  
          fetchStatus[tid] = TrapPending;
          status_change = true;
  
          warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
-//        cpu->trap(fault);
-        // Send a signal to the ROB indicating that there's a trap from the
-        // fetch stage that needs to be handled.  Need to indicate that
-        // there's a fault, and the fault type.
  #else // !FULL_SYSTEM
          fatal("fault (%d) detected @ PC %08p", fault, PC[tid]);
  #endif // FULL_SYSTEM
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh

index d5f893e576c1d9da485db4a62ce2192474cff11b..a1eeccbe744ae79ada108fc5787864e8b5ce3a10 100644 (file)
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -32,10 +32,9 @@
  #include <map>
  #include <queue>
  
-#include "base/hashmap.hh"
  #include "config/full_system.hh"
  #include "cpu/inst_seq.hh"
-#include "cpu/o3/cpu_policy.hh"
+//#include "cpu/o3/cpu_policy.hh"
  #include "cpu/o3/lsq_unit.hh"
  #include "mem/mem_interface.hh"
  //#include "mem/page_table.hh"
@@ -85,7 +84,8 @@ class LSQ {
      /** Ticks the LSQ. */
      void tick();
      /** Ticks a specific LSQ Unit. */
-    void tick(unsigned tid);
+    void tick(unsigned tid)
+    { thread[tid].tick(); }
  
      /** Inserts a load into the LSQ. */
      void insertLoad(DynInstPtr &load_inst);
@@ -95,18 +95,23 @@ class LSQ {
      /** Executes a load. */
      Fault executeLoad(DynInstPtr &inst);
  
-    Fault executeLoad(int lq_idx, unsigned tid);
+    Fault executeLoad(int lq_idx, unsigned tid)
+    { return thread[tid].executeLoad(lq_idx); }
+
      /** Executes a store. */
      Fault executeStore(DynInstPtr &inst);
  
      /**
       * Commits loads up until the given sequence number for a specific thread.
       */
-    void commitLoads(InstSeqNum &youngest_inst, unsigned tid);
+    void commitLoads(InstSeqNum &youngest_inst, unsigned tid)
+    { thread[tid].commitLoads(youngest_inst); }
+
      /**
       * Commits stores up until the given sequence number for a specific thread.
       */
-    void commitStores(InstSeqNum &youngest_inst, unsigned tid);
+    void commitStores(InstSeqNum &youngest_inst, unsigned tid)
+    { thread[tid].commitStores(youngest_inst); }
  
      /**
       * Attempts to write back stores until all cache ports are used or the
@@ -119,7 +124,8 @@ class LSQ {
      /**
       * Squash instructions from a thread until the specified sequence number.
       */
-    void squash(const InstSeqNum &squashed_num, unsigned tid);
+    void squash(const InstSeqNum &squashed_num, unsigned tid)
+    { thread[tid].squash(squashed_num); }
  
      /** Returns whether or not there was a memory ordering violation. */
      bool violation();
@@ -127,12 +133,14 @@ class LSQ {
       * Returns whether or not there was a memory ordering violation for a
       * specific thread.
       */
-    bool violation(unsigned tid);
+    bool violation(unsigned tid)
+    { return thread[tid].violation(); }
  
      /** Returns if a load is blocked due to the memory system for a specific
       *  thread.
       */
-    bool loadBlocked(unsigned tid);
+    bool loadBlocked(unsigned tid)
+    { return thread[tid].loadBlocked(); }
  
      bool isLoadBlockedHandled(unsigned tid)
      { return thread[tid].isLoadBlockedHandled(); }
@@ -141,10 +149,13 @@ class LSQ {
      { thread[tid].setLoadBlockedHandled(); }
  
      /** Gets the instruction that caused the memory ordering violation. */
-    DynInstPtr getMemDepViolator(unsigned tid);
+    DynInstPtr getMemDepViolator(unsigned tid)
+    { return thread[tid].getMemDepViolator(); }
  
      /** Returns the head index of the load queue for a specific thread. */
-    int getLoadHead(unsigned tid);
+    int getLoadHead(unsigned tid)
+    { return thread[tid].getLoadHead(); }
+
      /** Returns the sequence number of the head of the load queue. */
      InstSeqNum getLoadHeadSeqNum(unsigned tid)
      {
@@ -152,7 +163,9 @@ class LSQ {
      }
  
      /** Returns the head index of the store queue. */
-    int getStoreHead(unsigned tid);
+    int getStoreHead(unsigned tid)
+    { return thread[tid].getStoreHead(); }
+
      /** Returns the sequence number of the head of the store queue. */
      InstSeqNum getStoreHeadSeqNum(unsigned tid)
      {
@@ -162,22 +175,26 @@ class LSQ {
      /** Returns the number of instructions in all of the queues. */
      int getCount();
      /** Returns the number of instructions in the queues of one thread. */
-    int getCount(unsigned tid);
+    int getCount(unsigned tid)
+    { return thread[tid].getCount(); }
  
      /** Returns the total number of loads in the load queue. */
      int numLoads();
      /** Returns the total number of loads for a single thread. */
-    int numLoads(unsigned tid);
+    int numLoads(unsigned tid)
+    { return thread[tid].numLoads(); }
  
      /** Returns the total number of stores in the store queue. */
      int numStores();
      /** Returns the total number of stores for a single thread. */
-    int numStores(unsigned tid);
+    int numStores(unsigned tid)
+    { return thread[tid].numStores(); }
  
      /** Returns the total number of loads that are ready. */
      int numLoadsReady();
      /** Returns the number of loads that are ready for a single thread. */
-    int numLoadsReady(unsigned tid);
+    int numLoadsReady(unsigned tid)
+    { return thread[tid].numLoadsReady(); }
  
      /** Returns the number of free entries. */
      unsigned numFreeEntries();
@@ -215,24 +232,30 @@ class LSQ {
  
      /** Returns whether or not there are any stores to write back to memory. */
      bool hasStoresToWB();
+
      /** Returns whether or not a specific thread has any stores to write back
       * to memory.
       */
-    bool hasStoresToWB(unsigned tid);
+    bool hasStoresToWB(unsigned tid)
+    { return thread[tid].hasStoresToWB(); }
+
      /** Returns the number of stores a specific thread has to write back. */
-    int  numStoresToWB(unsigned tid);
+    int  numStoresToWB(unsigned tid)
+    { return thread[tid].numStoresToWB(); }
  
      /** Returns if the LSQ will write back to memory this cycle. */
      bool willWB();
      /** Returns if the LSQ of a specific thread will write back to memory this
       * cycle.
       */
-    bool willWB(unsigned tid);
+    bool willWB(unsigned tid)
+    { return thread[tid].willWB(); }
  
      /** Debugging function to print out all instructions. */
      void dumpInsts();
      /** Debugging function to print out instructions from a specific thread. */
-    void dumpInsts(unsigned tid);
+    void dumpInsts(unsigned tid)
+    { thread[tid].dumpInsts(); }
  
      /** Executes a read operation, using the load specified at the load index. */
      template <class T>
diff --git a/cpu/o3/lsq_impl.hh b/cpu/o3/lsq_impl.hh

index c43c1961986efb79e9d1bb6e728b29ea52ec7b8a..a6ad275227bdb62128087fc11646583e748e0fd3 100644 (file)
--- a/cpu/o3/lsq_impl.hh
+++ b/cpu/o3/lsq_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,9 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
+#include <algorithm>
+#include <string>
+
  #include "cpu/o3/lsq.hh"
  
  using namespace std;
@@ -89,7 +92,7 @@ LSQ<Impl>::LSQ(Params *params)
  
      //Initialize LSQs
      for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].init(params, maxLQEntries+1, maxSQEntries+1, tid);
+        thread[tid].init(params, maxLQEntries, maxSQEntries, tid);
      }
  }
  
@@ -226,13 +229,6 @@ LSQ<Impl>::tick()
      }
  }
  
-template<class Impl>
-void
-LSQ<Impl>::tick(unsigned tid)
-{
-    thread[tid].tick();
-}
-
  template<class Impl>
  void
  LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
@@ -260,13 +256,6 @@ LSQ<Impl>::executeLoad(DynInstPtr &inst)
      return thread[tid].executeLoad(inst);
  }
  
-template<class Impl>
-Fault
-LSQ<Impl>::executeLoad(int lq_idx, unsigned tid)
-{
-    return thread[tid].executeLoad(lq_idx);
-}
-
  template<class Impl>
  Fault
  LSQ<Impl>::executeStore(DynInstPtr &inst)
@@ -276,20 +265,6 @@ LSQ<Impl>::executeStore(DynInstPtr &inst)
      return thread[tid].executeStore(inst);
  }
  
-template<class Impl>
-void
-LSQ<Impl>::commitLoads(InstSeqNum &youngest_inst,unsigned tid)
-{
-    thread[tid].commitLoads(youngest_inst);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::commitStores(InstSeqNum &youngest_inst,unsigned tid)
-{
-    thread[tid].commitStores(youngest_inst);
-}
-
  template<class Impl>
  void
  LSQ<Impl>::writebackStores()
@@ -300,28 +275,14 @@ LSQ<Impl>::writebackStores()
          unsigned tid = *active_threads++;
  
          if (numStoresToWB(tid) > 0) {
-            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores available"
-                " for Writeback.\n", tid, numStoresToWB(tid));
+            DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
+                "available for Writeback.\n", tid, numStoresToWB(tid));
          }
  
          thread[tid].writebackStores();
      }
  }
  
-template<class Impl>
-int
-LSQ<Impl>::numStoresToWB(unsigned tid)
-{
-    return thread[tid].numStoresToWB();
-}
-
-template<class Impl>
-void
-LSQ<Impl>::squash(const InstSeqNum &squashed_num, unsigned tid)
-{
-        thread[tid].squash(squashed_num);
-}
-
  template<class Impl>
  bool
  LSQ<Impl>::violation()
@@ -338,41 +299,6 @@ LSQ<Impl>::violation()
      return false;
  }
  
-template<class Impl>
-bool
-LSQ<Impl>::violation(unsigned tid)
-{
-    return thread[tid].violation();
-}
-
-template<class Impl>
-bool
-LSQ<Impl>::loadBlocked(unsigned tid)
-{
-    return thread[tid].loadBlocked();
-}
-
-template<class Impl>
-typename Impl::DynInstPtr
-LSQ<Impl>::getMemDepViolator(unsigned tid)
-{
-    return thread[tid].getMemDepViolator();
-}
-
-template<class Impl>
-int
-LSQ<Impl>::getLoadHead(unsigned tid)
-{
-    return thread[tid].getLoadHead();
-}
-
-template<class Impl>
-int
-LSQ<Impl>::getStoreHead(unsigned tid)
-{
-    return thread[tid].getStoreHead();
-}
-
  template<class Impl>
  int
  LSQ<Impl>::getCount()
@@ -389,13 +315,6 @@ LSQ<Impl>::getCount()
      return total;
  }
  
-template<class Impl>
-int
-LSQ<Impl>::getCount(unsigned tid)
-{
-    return thread[tid].getCount();
-}
-
  template<class Impl>
  int
  LSQ<Impl>::numLoads()
@@ -412,13 +331,6 @@ LSQ<Impl>::numLoads()
      return total;
  }
  
-template<class Impl>
-int
-LSQ<Impl>::numLoads(unsigned tid)
-{
-    return thread[tid].numLoads();
-}
-
  template<class Impl>
  int
  LSQ<Impl>::numStores()
@@ -435,13 +347,6 @@ LSQ<Impl>::numStores()
      return total;
  }
  
-template<class Impl>
-int
-LSQ<Impl>::numStores(unsigned tid)
-{
-    return thread[tid].numStores();
-}
-
  template<class Impl>
  int
  LSQ<Impl>::numLoadsReady()
@@ -458,13 +363,6 @@ LSQ<Impl>::numLoadsReady()
      return total;
  }
  
-template<class Impl>
-int
-LSQ<Impl>::numLoadsReady(unsigned tid)
-{
-    return thread[tid].numLoadsReady();
-}
-
  template<class Impl>
  unsigned
  LSQ<Impl>::numFreeEntries()
@@ -612,14 +510,6 @@ LSQ<Impl>::hasStoresToWB()
      return true;
  }
  
-
-template<class Impl>
-bool
-LSQ<Impl>::hasStoresToWB(unsigned tid)
-{
-    return thread[tid].hasStoresToWB();
-}
-
  template<class Impl>
  bool
  LSQ<Impl>::willWB()
@@ -635,13 +525,6 @@ LSQ<Impl>::willWB()
      return true;
  }
  
-template<class Impl>
-bool
-LSQ<Impl>::willWB(unsigned tid)
-{
-    return thread[tid].willWB();
-}
-
  template<class Impl>
  void
  LSQ<Impl>::dumpInsts()
@@ -653,10 +536,3 @@ LSQ<Impl>::dumpInsts()
          thread[tid].dumpInsts();
      }
  }
-
-template<class Impl>
-void
-LSQ<Impl>::dumpInsts(unsigned tid)
-{
-    thread[tid].dumpInsts();
-}
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh

index 623dbdb4bb7c70217fd8133d8688b485067f64fb..942b4583d8303088c352c8da2f805d0c897e349f 100644 (file)
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -29,29 +29,30 @@
  #ifndef __CPU_O3_LSQ_UNIT_HH__
  #define __CPU_O3_LSQ_UNIT_HH__
  
+#include <algorithm>
  #include <map>
  #include <queue>
-#include <algorithm>
  
+#include "arch/faults.hh"
  #include "config/full_system.hh"
  #include "base/hashmap.hh"
  #include "cpu/inst_seq.hh"
  #include "mem/mem_interface.hh"
  //#include "mem/page_table.hh"
-#include "sim/debug.hh"
-#include "sim/sim_object.hh"
-#include "arch/faults.hh"
+//#include "sim/debug.hh"
+//#include "sim/sim_object.hh"
  
  /**
- * Class that implements the actual LQ and SQ for each specific thread.
- * Both are circular queues; load entries are freed upon committing, while
- * store entries are freed once they writeback. The LSQUnit tracks if there
- * are memory ordering violations, and also detects partial load to store
- * forwarding cases (a store only has part of a load's data) that requires
- * the load to wait until the store writes back. In the former case it
- * holds onto the instruction until the dependence unit looks at it, and
- * in the latter it stalls the LSQ until the store writes back. At that
- * point the load is replayed.
+ * Class that implements the actual LQ and SQ for each specific
+ * thread.  Both are circular queues; load entries are freed upon
+ * committing, while store entries are freed once they writeback. The
+ * LSQUnit tracks if there are memory ordering violations, and also
+ * detects partial load to store forwarding cases (a store only has
+ * part of a load's data) that requires the load to wait until the
+ * store writes back. In the former case it holds onto the instruction
+ * until the dependence unit looks at it, and in the latter it stalls
+ * the LSQ until the store writes back. At that point the load is
+ * replayed.
   */
  template <class Impl>
  class LSQUnit {
@@ -76,21 +77,19 @@ class LSQUnit {
          /** Returns the description of this event. */
          const char *description();
  
-      private:
-        /** The store index of the store being written back. */
-        int storeIdx;
          /** The writeback event for the store.  Needed for store
           * conditionals.
           */
-      public:
          Event *wbEvent;
+
+      private:
+        /** The store index of the store being written back. */
+        int storeIdx;
        private:
          /** The pointer to the LSQ unit that issued the store. */
          LSQUnit<Impl> *lsqPtr;
      };
  
-    friend class StoreCompletionEvent;
-
    public:
      /** Constructs an LSQ unit. init() must be called prior to use. */
      LSQUnit();
@@ -136,14 +135,12 @@ class LSQUnit {
      /** Executes a load instruction. */
      Fault executeLoad(DynInstPtr &inst);
  
-    Fault executeLoad(int lq_idx);
+    Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
      /** Executes a store instruction. */
      Fault executeStore(DynInstPtr &inst);
  
      /** Commits the head load. */
      void commitLoad();
-    /** Commits a specific load, given by the sequence number. */
-    void commitLoad(InstSeqNum &inst);
      /** Commits loads older than a specific sequence number. */
      void commitLoads(InstSeqNum &youngest_inst);
  
@@ -179,9 +176,7 @@ class LSQUnit {
      /** Returns the memory ordering violator. */
      DynInstPtr getMemDepViolator();
  
-    /** Returns if a load became blocked due to the memory system.  It clears
-     *  the bool's value upon this being called.
-     */
+    /** Returns if a load became blocked due to the memory system. */
      bool loadBlocked()
      { return isLoadBlocked; }
  
@@ -215,9 +210,6 @@ class LSQUnit {
      /** Returns if the SQ is full. */
      bool sqFull() { return stores >= (SQEntries - 1); }
  
-    /** Debugging function to dump instructions in the LSQ. */
-    void dumpInsts();
-
      /** Returns the number of instructions in the LSQ. */
      unsigned getCount() { return loads + stores; }
  
@@ -245,6 +237,10 @@ class LSQUnit {
      /** Decrements the given load index (circular queue). */
      inline void decrLdIdx(int &load_idx);
  
+  public:
+    /** Debugging function to dump instructions in the LSQ. */
+    void dumpInsts();
+
    private:
      /** Pointer to the CPU. */
      FullCPU *cpu;
@@ -287,38 +283,29 @@ class LSQUnit {
          /** Whether or not the store is completed. */
          bool completed;
      };
-/*
-    enum Status {
-        Running,
-        Idle,
-        DcacheMissStall,
-        DcacheMissSwitch
-    };
-*/
+
    private:
      /** The LSQUnit thread id. */
      unsigned lsqID;
  
-    /** The status of the LSQ unit. */
-//    Status _status;
-
      /** The store queue. */
      std::vector<SQEntry> storeQueue;
  
      /** The load queue. */
      std::vector<DynInstPtr> loadQueue;
  
-    // Consider making these 16 bits
-    /** The number of LQ entries. */
+    /** The number of LQ entries, plus a sentinel entry (circular queue).
+     *  @todo: Consider having var that records the true number of LQ entries.
+     */
      unsigned LQEntries;
-    /** The number of SQ entries. */
+    /** The number of SQ entries, plus a sentinel entry (circular queue).
+     *  @todo: Consider having var that records the true number of SQ entries.
+     */
      unsigned SQEntries;
  
      /** The number of load instructions in the LQ. */
      int loads;
-    /** The number of store instructions in the SQ (excludes those waiting to
-     * writeback).
-     */
+    /** The number of store instructions in the SQ. */
      int stores;
      /** The number of store instructions in the SQ waiting to writeback. */
      int storesToWB;
@@ -330,8 +317,8 @@ class LSQUnit {
  
      /** The index of the head instruction in the SQ. */
      int storeHead;
-    /** The index of the first instruction that is ready to be written back,
-     * and has not yet been written back.
+    /** The index of the first instruction that may be ready to be
+     * written back, and has not yet been written back.
       */
      int storeWBIdx;
      /** The index of the tail instruction in the SQ. */
@@ -348,13 +335,9 @@ class LSQUnit {
  
      //list<InstSeqNum> mshrSeqNums;
  
-     //Stats::Scalar<> dcacheStallCycles;
-    Counter lastDcacheStall;
-
      /** Wire to read information from the issue stage time queue. */
      typename TimeBuffer<IssueStruct>::wire fromIssue;
  
-    // Make these per thread?
      /** Whether or not the LSQ is stalled. */
      bool stalled;
      /** The store that causes the stall due to partial store to load
@@ -364,20 +347,13 @@ class LSQUnit {
      /** The index of the above store. */
      int stallingLoadIdx;
  
-    /** Whether or not a load is blocked due to the memory system.  It is
-     *  cleared when this value is checked via loadBlocked().
-     */
+    /** Whether or not a load is blocked due to the memory system. */
      bool isLoadBlocked;
  
      bool loadBlockedHandled;
  
      InstSeqNum blockedLoadSeqNum;
  
-    /** The oldest faulting load instruction. */
-    DynInstPtr loadFaultInst;
-    /** The oldest faulting store instruction. */
-    DynInstPtr storeFaultInst;
-
      /** The oldest load that caused a memory ordering violation. */
      DynInstPtr memDepViolator;
  
@@ -447,23 +423,14 @@ template <class T>
  Fault
  LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
  {
-    //Depending on issue2execute delay a squashed load could
-    //execute if it is found to be squashed in the same
-    //cycle it is scheduled to execute
      assert(loadQueue[load_idx]);
  
-    if (loadQueue[load_idx]->isExecuted()) {
-        panic("Should not reach this point with split ops!");
-        memcpy(&data,req->data,req->size);
-
-        return NoFault;
-    }
+    assert(!loadQueue[load_idx]->isExecuted());
  
      // Make sure this isn't an uncacheable access
      // A bit of a hackish way to get uncached accesses to work only if they're
      // at the head of the LSQ and are ready to commit (at the head of the ROB
      // too).
-    // @todo: Fix uncached accesses.
      if (req->flags & UNCACHEABLE &&
          (load_idx != loadHead || !loadQueue[load_idx]->reachedCommit)) {
          iewStage->rescheduleMemInst(loadQueue[load_idx]);
@@ -479,12 +446,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
              "storeHead: %i addr: %#x\n",
              load_idx, store_idx, storeHead, req->paddr);
  
-#ifdef FULL_SYSTEM
+#if 0
      if (req->flags & LOCKED) {
          cpu->lockAddr = req->paddr;
          cpu->lockFlag = true;
      }
  #endif
+            req->cmd = Read;
+            assert(!req->completionEvent);
+            req->completionEvent = NULL;
+            req->time = curTick;
  
      while (store_idx != -1) {
          // End once we've reached the top of the LSQ
@@ -518,18 +489,14 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
  
          // If the store's data has all of the data needed, we can forward.
          if (store_has_lower_limit && store_has_upper_limit) {
-
+            // Get shift amount for offset into the store's data.
              int shift_amt = req->vaddr & (store_size - 1);
-            // Assumes byte addressing
+            // @todo: Magic number, assumes byte addressing
              shift_amt = shift_amt << 3;
  
              // Cast this to type T?
              data = storeQueue[store_idx].data >> shift_amt;
  
-            req->cmd = Read;
-            assert(!req->completionEvent);
-            req->completionEvent = NULL;
-            req->time = curTick;
              assert(!req->data);
              req->data = new uint8_t[64];
  
@@ -579,7 +546,6 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
  
              // Do not generate a writeback event as this instruction is not
              // complete.
-
              DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
                      "Store idx %i to load addr %#x\n",
                      store_idx, req->vaddr);
@@ -588,16 +554,13 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
          }
      }
  
-
      // If there's no forwarding case, then go access memory
      DynInstPtr inst = loadQueue[load_idx];
  
-    DPRINTF(LSQUnit, "Doing functional access for inst PC %#x\n",
-            loadQueue[load_idx]->readPC());
+    DPRINTF(LSQUnit, "Doing functional access for inst [sn:%lli] PC %#x\n",
+            loadQueue[load_idx]->seqNum, loadQueue[load_idx]->readPC());
+
      assert(!req->data);
-    req->cmd = Read;
-    req->completionEvent = NULL;
-    req->time = curTick;
      req->data = new uint8_t[64];
      Fault fault = cpu->read(req, data);
      memcpy(req->data, &data, sizeof(T));
@@ -611,20 +574,19 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
              if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
                  return NoFault;
  
+            // Record that the load was blocked due to memory.  This
+            // load will squash all instructions after it, be
+            // refetched, and re-executed.
              isLoadBlocked = true;
              loadBlockedHandled = false;
              blockedLoadSeqNum = inst->seqNum;
              // No fault occurred, even though the interface is blocked.
              return NoFault;
          }
+
          DPRINTF(LSQUnit, "Doing timing access for inst PC %#x\n",
                  loadQueue[load_idx]->readPC());
-/*
-        Addr debug_addr = ULL(0xfffffc0000be81a8);
-        if (req->vaddr == debug_addr) {
-            debug_break();
-        }
-*/
+
          assert(!req->completionEvent);
          req->completionEvent =
              new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
@@ -632,75 +594,16 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
  
          assert(dcacheInterface->doEvents());
  
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
          if (result != MA_HIT) {
              DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
              DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
                      inst->seqNum);
-
-            lastDcacheStall = curTick;
-
-//            _status = DcacheMissStall;
-
          } else {
-            DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
-                    inst->seqNum);
-
              DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
-        }
-    }
-#if 0
-    // if we have a cache, do cache access too
-    if (dcacheInterface) {
-        if (dcacheInterface->isBlocked()) {
-            isLoadBlocked = true;
-            // No fault occurred, even though the interface is blocked.
-            return NoFault;
-        }
-
-        DPRINTF(LSQUnit, "LSQUnit: D-cache: PC:%#x reading from paddr:%#x "
-                "vaddr:%#x flags:%i\n",
-                inst->readPC(), req->paddr, req->vaddr, req->flags);
-
-        // Setup MemReq pointer
-        req->cmd = Read;
-        req->completionEvent = NULL;
-        req->time = curTick;
-        assert(!req->data);
-        req->data = new uint8_t[64];
-
-        assert(!req->completionEvent);
-        req->completionEvent =
-            new typename IEW::LdWritebackEvent(loadQueue[load_idx], iewStage);
-
-        // Do Cache Access
-        MemAccessResult result = dcacheInterface->access(req);
-
-        // Ugly hack to get an event scheduled *only* if the access is
-        // a miss.  We really should add first-class support for this
-        // at some point.
-        // @todo: Probably should support having no events
-        if (result != MA_HIT) {
-            DPRINTF(LSQUnit, "LSQUnit: D-cache miss!\n");
-            DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
-                    inst->seqNum);
-
-            lastDcacheStall = curTick;
-
-            _status = DcacheMissStall;
-
-        } else {
              DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
                      inst->seqNum);
-
-            DPRINTF(LSQUnit, "LSQUnit: D-cache hit!\n");
          }
-    } else {
-        fatal("Must use D-cache with new memory system");
      }
-#endif
  
      return fault;
  }
@@ -716,24 +619,11 @@ LSQUnit<Impl>::write(MemReqPtr &req, T &data, int store_idx)
              " | storeHead:%i [sn:%i]\n",
              store_idx, req->paddr, data, storeHead,
              storeQueue[store_idx].inst->seqNum);
-/*
-    if (req->flags & LOCKED) {
-        if (req->flags & UNCACHEABLE) {
-            req->result = 2;
-        } else {
-            req->result = 1;
-        }
-    }
-*/
+
      storeQueue[store_idx].req = req;
      storeQueue[store_idx].size = sizeof(T);
      storeQueue[store_idx].data = data;
-/*
-    Addr debug_addr = ULL(0xfffffc0000be81a8);
-    if (req->vaddr == debug_addr) {
-        debug_break();
-    }
-*/
+
      // This function only writes the data to the store queue, so no fault
      // can happen here.
      return NoFault;
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh

index dca808ac94c1c760253831c508acf2bcf772d848..f0b4405ede71bf46f1c54bd21cc911169aee1222 100644 (file)
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -35,8 +35,8 @@ LSQUnit<Impl>::StoreCompletionEvent::StoreCompletionEvent(int store_idx,
                                                            Event *wb_event,
                                                            LSQUnit<Impl> *lsq_ptr)
      : Event(&mainEventQueue),
-      storeIdx(store_idx),
        wbEvent(wb_event),
+      storeIdx(store_idx),
        lsqPtr(lsq_ptr)
  {
      this->setFlags(Event::AutoDelete);
@@ -86,15 +86,13 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
  
      lsqID = id;
  
-    LQEntries = maxLQEntries;
-    SQEntries = maxSQEntries;
+    // Add 1 for the sentinel entry (they are circular queues).
+    LQEntries = maxLQEntries + 1;
+    SQEntries = maxSQEntries + 1;
  
      loadQueue.resize(LQEntries);
      storeQueue.resize(SQEntries);
  
-
-    // May want to initialize these entries to NULL
-
      loadHead = loadTail = 0;
  
      storeHead = storeWBIdx = storeTail = 0;
@@ -104,7 +102,7 @@ LSQUnit<Impl>::init(Params *params, unsigned maxLQEntries,
  
      dcacheInterface = params->dcacheInterface;
  
-    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+    memDepViolator = NULL;
  
      blockedLoadSeqNum = 0;
  }
@@ -152,6 +150,8 @@ LSQUnit<Impl>::switchOut()
      for (int i = 0; i < loadQueue.size(); ++i)
          loadQueue[i] = NULL;
  
+    assert(storesToWB == 0);
+
      while (storesToWB > 0 &&
             storeWBIdx != storeTail &&
             storeQueue[storeWBIdx].inst &&
@@ -218,7 +218,7 @@ LSQUnit<Impl>::takeOverFrom()
  
      usedPorts = 0;
  
-    loadFaultInst = storeFaultInst = memDepViolator = NULL;
+    memDepViolator = NULL;
  
      blockedLoadSeqNum = 0;
  
@@ -231,16 +231,17 @@ template<class Impl>
  void
  LSQUnit<Impl>::resizeLQ(unsigned size)
  {
-    assert( size >= LQEntries);
+    unsigned size_plus_sentinel = size + 1;
+    assert(size_plus_sentinel >= LQEntries);
  
-    if (size > LQEntries) {
-        while (size > loadQueue.size()) {
+    if (size_plus_sentinel > LQEntries) {
+        while (size_plus_sentinel > loadQueue.size()) {
              DynInstPtr dummy;
              loadQueue.push_back(dummy);
              LQEntries++;
          }
      } else {
-        LQEntries = size;
+        LQEntries = size_plus_sentinel;
      }
  
  }
@@ -249,14 +250,15 @@ template<class Impl>
  void
  LSQUnit<Impl>::resizeSQ(unsigned size)
  {
-    if (size > SQEntries) {
-        while (size > storeQueue.size()) {
+    unsigned size_plus_sentinel = size + 1;
+    if (size_plus_sentinel > SQEntries) {
+        while (size_plus_sentinel > storeQueue.size()) {
              SQEntry dummy;
              storeQueue.push_back(dummy);
              SQEntries++;
          }
      } else {
-        SQEntries = size;
+        SQEntries = size_plus_sentinel;
      }
  }
  
@@ -264,10 +266,8 @@ template <class Impl>
  void
  LSQUnit<Impl>::insert(DynInstPtr &inst)
  {
-    // Make sure we really have a memory reference.
      assert(inst->isMemRef());
  
-    // Make sure it's one of the two classes of memory references.
      assert(inst->isLoad() || inst->isStore());
  
      if (inst->isLoad()) {
@@ -283,7 +283,8 @@ template <class Impl>
  void
  LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
  {
-    assert((loadTail + 1) % LQEntries != loadHead && loads < LQEntries);
+    assert((loadTail + 1) % LQEntries != loadHead);
+    assert(loads < LQEntries);
  
      DPRINTF(LSQUnit, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
              load_inst->readPC(), loadTail, load_inst->seqNum);
@@ -322,7 +323,6 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
      incrStIdx(storeTail);
  
      ++stores;
-
  }
  
  template <class Impl>
@@ -370,39 +370,6 @@ LSQUnit<Impl>::numLoadsReady()
      return retval;
  }
  
-#if 0
-template <class Impl>
-Fault
-LSQUnit<Impl>::executeLoad()
-{
-    Fault load_fault = NoFault;
-    DynInstPtr load_inst;
-
-    assert(readyLoads.size() != 0);
-
-    // Execute a ready load.
-    LdMapIt ready_it = readyLoads.begin();
-
-    load_inst = (*ready_it).second;
-
-    // Execute the instruction, which is held in the data portion of the
-    // iterator.
-    load_fault = load_inst->execute();
-
-    // If it executed successfully, then switch it over to the executed
-    // loads list.
-    if (load_fault == NoFault) {
-        executedLoads[load_inst->seqNum] = load_inst;
-
-        readyLoads.erase(ready_it);
-    } else {
-        loadFaultInst = load_inst;
-    }
-
-    return load_fault;
-}
-#endif
-
  template <class Impl>
  Fault
  LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
@@ -413,33 +380,14 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
      DPRINTF(LSQUnit, "Executing load PC %#x, [sn:%lli]\n",
              inst->readPC(),inst->seqNum);
  
-    // Make sure it's really in the list.
-    // Normally it should always be in the list.  However,
-    /* due to a syscall it may not be the list.
-#ifdef DEBUG
-    int i = loadHead;
-    while (1) {
-        if (i == loadTail && !find(inst)) {
-            assert(0 && "Load not in the queue!");
-        } else if (loadQueue[i] == inst) {
-            break;
-        }
-
-        i = i + 1;
-        if (i >= LQEntries) {
-            i = 0;
-        }
-    }
-#endif // DEBUG*/
-
  //    load_fault = inst->initiateAcc();
      load_fault = inst->execute();
  
      // If the instruction faulted, then we need to send it along to commit
      // without the instruction completing.
      if (load_fault != NoFault) {
-        // Maybe just set it as can commit here, although that might cause
-        // some other problems with sending traps to the ROB too quickly.
+        // Send this instruction to commit, also make sure iew stage
+        // realizes there is activity.
          iewStage->instToCommit(inst);
          iewStage->activityThisCycle();
      }
@@ -447,20 +395,6 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
      return load_fault;
  }
  
-template <class Impl>
-Fault
-LSQUnit<Impl>::executeLoad(int lq_idx)
-{
-    // Very hackish.  Not sure the best way to check that this
-    // instruction is at the head of the ROB.  I should have some sort
-    // of extra information here so that I'm not overloading the
-    // canCommit signal for 15 different things.
-    loadQueue[lq_idx]->setCanCommit();
-    Fault ret_fault = executeLoad(loadQueue[lq_idx]);
-    loadQueue[lq_idx]->clearCanCommit();
-    return ret_fault;
-}
-
  template <class Impl>
  Fault
  LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
@@ -481,11 +415,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
      Fault store_fault = store_inst->initiateAcc();
  //    Fault store_fault = store_inst->execute();
  
-    // Store size should now be available.  Use it to get proper offset for
-    // addr comparisons.
-    int size = storeQueue[store_idx].size;
-
-    if (size == 0) {
+    if (storeQueue[store_idx].size == 0) {
          DPRINTF(LSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
                  store_inst->readPC(),store_inst->seqNum);
  
@@ -494,30 +424,25 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
  
      assert(store_fault == NoFault);
  
-    if (!storeFaultInst) {
-        if (store_fault != NoFault) {
-            panic("Fault in a store instruction!");
-            storeFaultInst = store_inst;
-        } else if (store_inst->isNonSpeculative()) {
-            // Nonspeculative accesses (namely store conditionals)
-            // need to set themselves as able to writeback if we
-            // haven't had a fault by here.
-            storeQueue[store_idx].canWB = true;
+    if (store_inst->isNonSpeculative()) {
+        // Nonspeculative accesses (namely store conditionals)
+        // need to set themselves as able to writeback if we
+        // haven't had a fault by here.
+        storeQueue[store_idx].canWB = true;
  
-            ++storesToWB;
-        }
+        ++storesToWB;
      }
  
      if (!memDepViolator) {
          while (load_idx != loadTail) {
-            // Actually should only check loads that have actually executed
-            // Might be safe because effAddr is set to InvalAddr when the
-            // dyn inst is created.
-
-            // Must actually check all addrs in the proper size range
-            // Which is more correct than needs to be.  What if for now we just
-            // assume all loads are quad-word loads, and do the addr based
-            // on that.
+            // Really only need to check loads that have actually executed
+            // It's safe to check all loads because effAddr is set to
+            // InvalAddr when the dyn inst is created.
+
+            // @todo: For now this is extra conservative, detecting a
+            // violation if the addresses match assuming all accesses
+            // are quad word accesses.
+
              // @todo: Fix this, magic number being used here
              if ((loadQueue[load_idx]->effAddr >> 8) ==
                  (store_inst->effAddr >> 8)) {
@@ -555,32 +480,6 @@ LSQUnit<Impl>::commitLoad()
      --loads;
  }
  
-template <class Impl>
-void
-LSQUnit<Impl>::commitLoad(InstSeqNum &inst)
-{
-    // Hopefully I don't use this function too much
-    panic("Don't use this function!");
-
-    int i = loadHead;
-    while (1) {
-        if (i == loadTail) {
-            assert(0 && "Load not in the queue!");
-        } else if (loadQueue[i]->seqNum == inst) {
-            break;
-        }
-
-        ++i;
-        if (i >= LQEntries) {
-            i = 0;
-        }
-    }
-
-    loadQueue[i]->removeInLSQ();
-    loadQueue[i] = NULL;
-    --loads;
-}
-
  template <class Impl>
  void
  LSQUnit<Impl>::commitLoads(InstSeqNum &youngest_inst)
@@ -602,6 +501,8 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
  
      while (store_idx != storeTail) {
          assert(storeQueue[store_idx].inst);
+        // Mark any stores that are now committed and have not yet
+        // been marked as able to write back.
          if (!storeQueue[store_idx].canWB) {
              if (storeQueue[store_idx].inst->seqNum > youngest_inst) {
                  break;
@@ -613,7 +514,6 @@ LSQUnit<Impl>::commitStores(InstSeqNum &youngest_inst)
  
              storeQueue[store_idx].canWB = true;
  
-//            --stores;
              ++storesToWB;
          }
  
@@ -631,6 +531,8 @@ LSQUnit<Impl>::writebackStores()
             storeQueue[storeWBIdx].canWB &&
             usedPorts < cachePorts) {
  
+        // Store didn't write any data so no need to write it back to
+        // memory.
          if (storeQueue[storeWBIdx].size == 0) {
              completeStore(storeWBIdx);
  
@@ -659,7 +561,6 @@ LSQUnit<Impl>::writebackStores()
          MemReqPtr req = storeQueue[storeWBIdx].req;
          storeQueue[storeWBIdx].committed = true;
  
-//     Fault fault = cpu->translateDataWriteReq(req);
          req->cmd = Write;
          req->completionEvent = NULL;
          req->time = curTick;
@@ -689,6 +590,12 @@ LSQUnit<Impl>::writebackStores()
            default:
              panic("Unexpected store size!\n");
          }
+
+        // Stores other than store conditionals are completed at this
+        // time.  Mark them as completed and, if we have a checker,
+        // tell it that the instruction is completed.
+        // @todo: Figure out what time I can say stores are complete in
+        // the timing memory.
          if (!(req->flags & LOCKED)) {
              storeQueue[storeWBIdx].inst->setCompleted();
              if (cpu->checker) {
@@ -714,57 +621,35 @@ LSQUnit<Impl>::writebackStores()
                  iewStage->replayMemInst(loadQueue[stallingLoadIdx]);
              }
  
-            if (result != MA_HIT && dcacheInterface->doEvents()) {
-                typename IEW::LdWritebackEvent *wb = NULL;
-                if (req->flags & LOCKED) {
-                    // Stx_C should not generate a system port transaction,
-                    // but that might be hard to accomplish.
-                    wb = new typename
-                        IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
+            typename IEW::LdWritebackEvent *wb = NULL;
+            if (req->flags & LOCKED) {
+                // Stx_C should not generate a system port transaction
+                // if it misses in the cache, but that might be hard
+                // to accomplish without explicit cache support.
+                wb = new typename
+                    IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
                                                iewStage);
-                    store_event->wbEvent = wb;
-                }
+                store_event->wbEvent = wb;
+            }
  
-                DPRINTF(LSQUnit,"D-Cache Write Miss!\n");
+            if (result != MA_HIT && dcacheInterface->doEvents()) {
+                DPRINTF(LSQUnit,"D-Cache Write Miss on idx:%i!\n",
+                        storeWBIdx);
  
                  DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
                          storeQueue[storeWBIdx].inst->seqNum);
  
-                lastDcacheStall = curTick;
-
-//                _status = DcacheMissStall;
-
                  //mshrSeqNums.push_back(storeQueue[storeWBIdx].inst->seqNum);
  
                  //DPRINTF(LSQUnit, "Added MSHR. count = %i\n",mshrSeqNums.size());
  
-                // Increment stat here or something
+                // @todo: Increment stat here.
              } else {
                  DPRINTF(LSQUnit,"D-Cache: Write Hit on idx:%i !\n",
                          storeWBIdx);
  
                  DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
                          storeQueue[storeWBIdx].inst->seqNum);
-
-
-                if (req->flags & LOCKED) {
-                    // Stx_C does not generate a system port transaction.
-/*
-                    if (req->flags & UNCACHEABLE) {
-                        req->result = 2;
-                    } else {
-                        if (cpu->lockFlag && cpu->lockAddr == req->paddr) {
-                            req->result=1;
-                        } else {
-                            req->result = 0;
-                        }
-                    }
-*/
-                    typename IEW::LdWritebackEvent *wb =
-                        new typename IEW::LdWritebackEvent(storeQueue[storeWBIdx].inst,
-                                                           iewStage);
-                    store_event->wbEvent = wb;
-                }
              }
  
              incrStIdx(storeWBIdx);
@@ -798,14 +683,12 @@ void
  LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
  {
      DPRINTF(LSQUnit, "Squashing until [sn:%lli]!"
-            "(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
+            "(Loads:%i Stores:%i)\n", squashed_num, loads, stores);
  
      int load_idx = loadTail;
      decrLdIdx(load_idx);
  
      while (loads != 0 && loadQueue[load_idx]->seqNum > squashed_num) {
-
-        // Clear the smart pointer to make sure it is decremented.
          DPRINTF(LSQUnit,"Load Instruction PC %#x squashed, "
                  "[sn:%lli]\n",
                  loadQueue[load_idx]->readPC(),
@@ -817,6 +700,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
              stallingLoadIdx = 0;
          }
  
+        // Clear the smart pointer to make sure it is decremented.
          loadQueue[load_idx]->squashed = true;
          loadQueue[load_idx] = NULL;
          --loads;
@@ -840,19 +724,18 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
  
      while (stores != 0 &&
             storeQueue[store_idx].inst->seqNum > squashed_num) {
-
+        // Instructions marked as can WB are already committed.
          if (storeQueue[store_idx].canWB) {
              break;
          }
  
-        // Clear the smart pointer to make sure it is decremented.
          DPRINTF(LSQUnit,"Store Instruction PC %#x squashed, "
                  "idx:%i [sn:%lli]\n",
                  storeQueue[store_idx].inst->readPC(),
                  store_idx, storeQueue[store_idx].inst->seqNum);
  
-        // I don't think this can happen.  It should have been cleared by the
-        // stalling load.
+        // I don't think this can happen.  It should have been cleared
+        // by the stalling load.
          if (isStalled() &&
              storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
              panic("Is stalled should have been cleared by stalling load!\n");
@@ -860,13 +743,17 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
              stallingStoreIsn = 0;
          }
  
+        // Clear the smart pointer to make sure it is decremented.
          storeQueue[store_idx].inst->squashed = true;
          storeQueue[store_idx].inst = NULL;
          storeQueue[store_idx].canWB = 0;
  
          if (storeQueue[store_idx].req) {
+            // There should not be a completion event if the store has
+            // not yet committed.
              assert(!storeQueue[store_idx].req->completionEvent);
          }
+
          storeQueue[store_idx].req = NULL;
          --stores;
  
@@ -877,36 +764,6 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
      }
  }
  
-template <class Impl>
-void
-LSQUnit<Impl>::dumpInsts()
-{
-    cprintf("Load store queue: Dumping instructions.\n");
-    cprintf("Load queue size: %i\n", loads);
-    cprintf("Load queue: ");
-
-    int load_idx = loadHead;
-
-    while (load_idx != loadTail && loadQueue[load_idx]) {
-        cprintf("%#x ", loadQueue[load_idx]->readPC());
-
-        incrLdIdx(load_idx);
-    }
-
-    cprintf("Store queue size: %i\n", stores);
-    cprintf("Store queue: ");
-
-    int store_idx = storeHead;
-
-    while (store_idx != storeTail && storeQueue[store_idx].inst) {
-        cprintf("%#x ", storeQueue[store_idx].inst->readPC());
-
-        incrStIdx(store_idx);
-    }
-
-    cprintf("\n");
-}
-
  template <class Impl>
  void
  LSQUnit<Impl>::completeStore(int store_idx)
@@ -930,7 +787,9 @@ LSQUnit<Impl>::completeStore(int store_idx)
          iewStage->updateLSQNextCycle = true;
      }
  
-    DPRINTF(LSQUnit, "Store head idx:%i\n", storeHead);
+    DPRINTF(LSQUnit, "Completing store [sn:%lli], idx:%i, store head "
+            "idx:%i\n",
+            storeQueue[store_idx].inst->seqNum, store_idx, storeHead);
  
      if (isStalled() &&
          storeQueue[store_idx].inst->seqNum == stallingStoreIsn) {
@@ -943,6 +802,10 @@ LSQUnit<Impl>::completeStore(int store_idx)
      }
  
      storeQueue[store_idx].inst->setCompleted();
+
+    // Tell the checker we've completed this instruction.  Some stores
+    // may get reported twice to the checker, but the checker can
+    // handle that case.
      if (cpu->checker) {
          cpu->checker->tick(storeQueue[store_idx].inst);
      }
@@ -979,3 +842,33 @@ LSQUnit<Impl>::decrLdIdx(int &load_idx)
      if (--load_idx < 0)
          load_idx += LQEntries;
  }
+
+template <class Impl>
+void
+LSQUnit<Impl>::dumpInsts()
+{
+    cprintf("Load store queue: Dumping instructions.\n");
+    cprintf("Load queue size: %i\n", loads);
+    cprintf("Load queue: ");
+
+    int load_idx = loadHead;
+
+    while (load_idx != loadTail && loadQueue[load_idx]) {
+        cprintf("%#x ", loadQueue[load_idx]->readPC());
+
+        incrLdIdx(load_idx);
+    }
+
+    cprintf("Store queue size: %i\n", stores);
+    cprintf("Store queue: ");
+
+    int store_idx = storeHead;
+
+    while (store_idx != storeTail && storeQueue[store_idx].inst) {
+        cprintf("%#x ", storeQueue[store_idx].inst->readPC());
+
+        incrStIdx(store_idx);
+    }
+
+    cprintf("\n");
+}
diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh

index 141e0fdc4ddd87452e7ed37d25fb11e6095bf09a..acbe08ec2dea8a09665b74301a6ef3187fa544e2 100644 (file)
--- a/cpu/o3/mem_dep_unit.hh
+++ b/cpu/o3/mem_dep_unit.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -201,13 +201,6 @@ class MemDepUnit {
          static int memdep_erase;
      };
  
-    struct ltMemDepEntry {
-        bool operator() (const MemDepEntryPtr &lhs, const MemDepEntryPtr &rhs)
-        {
-            return lhs->inst->seqNum < rhs->inst->seqNum;
-        }
-    };
-
      /** Finds the memory dependence entry in the hash map. */
      inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);
  
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh

index 05a33685de9afa34fc0c1cc8610caf751202b2f6..8b195baabfc4ebebc930f54fc93c0ab40cad0c01 100644 (file)
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -141,12 +141,12 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
          std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
      MemDepEntry::memdep_insert++;
  
-    // Add the instruction to the instruction list.
      instList[tid].push_back(inst);
  
      inst_entry->listIt = --(instList[tid].end());
  
-    // Check the dependence predictor for any producing stores.
+    // Check any barriers and the dependence predictor for any
+    // producing stores.
      InstSeqNum producing_store;
      if (inst->isLoad() && loadBarrier) {
          producing_store = loadBarrierSN;
@@ -181,7 +181,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
              moveToReady(inst_entry);
          }
      } else {
-        // Otherwise make the instruction dependent on the store.
+        // Otherwise make the instruction dependent on the store/barrier.
          DPRINTF(MemDepUnit, "Adding to dependency list; "
                  "inst PC %#x is dependent on [sn:%lli].\n",
                  inst->readPC(), producing_store);
@@ -193,8 +193,6 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
          // Add this instruction to the list of dependents.
          store_entry->dependInsts.push_back(inst_entry);
  
-//        inst_entry->producingStore = store_entry;
-
          if (inst->isLoad()) {
              ++conflictingLoads;
          } else {
@@ -370,8 +368,6 @@ MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
  
      instList[tid].erase((*hash_it).second->listIt);
  
-//    (*hash_it).second->inst = NULL;
-
      (*hash_it).second = NULL;
  
      memDepHash.erase(hash_it);
@@ -416,7 +412,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
  
          if (!woken_inst->inst) {
              // Potentially removed mem dep entries could be on this list
-//            inst_entry->dependInsts[i] = NULL;
              continue;
          }
  
@@ -429,7 +424,6 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
          } else {
              woken_inst->memDepReady = true;
          }
-//        inst_entry->dependInsts[i] = NULL;
      }
  
      inst_entry->dependInsts.clear();
@@ -468,13 +462,7 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
          assert(hash_it != memDepHash.end());
  
          (*hash_it).second->squashed = true;
-/*
-        for (int i = 0; i < (*hash_it).second->dependInsts.size(); ++i) {
-            (*hash_it).second->dependInsts[i] = NULL;
-        }
  
-        (*hash_it).second->inst = NULL;
-*/
          (*hash_it).second = NULL;
  
          memDepHash.erase(hash_it);
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh

index dd2cb0c1888376f4e963ddaaf48be39677384f29..3f1a27bb526b670468e1ef52acb63ca10604142f 100644 (file)
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -35,15 +35,16 @@
  #include "base/timebuf.hh"
  
  /**
- * DefaultRename handles both single threaded and SMT rename. Its width is
- * specified by the parameters; each cycle it tries to rename that many
- * instructions. It holds onto the rename history of all instructions with
- * destination registers, storing the arch. register, the new physical
- * register, and the old physical register, to allow for undoing of mappings
- * if squashing happens, or freeing up registers upon commit. Rename handles
- * blocking if the ROB, IQ, or LSQ is going to be full. Rename also handles
- * barriers, and does so by stalling on the instruction until the ROB is
- * empty and there are no instructions in flight to the ROB.
+ * DefaultRename handles both single threaded and SMT rename. Its
+ * width is specified by the parameters; each cycle it tries to rename
+ * that many instructions. It holds onto the rename history of all
+ * instructions with destination registers, storing the
+ * arch. register, the new physical register, and the old physical
+ * register, to allow for undoing of mappings if squashing happens, or
+ * freeing up registers upon commit. Rename handles blocking if the
+ * ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
+ * and does so by stalling on the instruction until the ROB is empty
+ * and there are no instructions in flight to the ROB.
   */
  template<class Impl>
  class DefaultRename
@@ -68,14 +69,15 @@ class DefaultRename
      // Typedefs from the ISA.
      typedef TheISA::RegIndex RegIndex;
  
-    // A deque is used to queue the instructions.  Barrier insts must be
-    // added to the front of the deque, which is the only reason for using
-    // a deque instead of a queue. (Most other stages use a queue)
+    // A list is used to queue the instructions.  Barrier insts must
+    // be added to the front of the list, which is the only reason for
+    // using a list instead of a queue. (Most other stages use a
+    // queue)
      typedef std::list<DynInstPtr> InstQueue;
  
    public:
-    /** Overall rename status. Used to determine if the CPU can deschedule
-     * itself due to a lack of activity.
+    /** Overall rename status. Used to determine if the CPU can
+     * deschedule itself due to a lack of activity.
       */
      enum RenameStatus {
          Active,
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh

index db4bb2ffe7b6bf6a93b3df3d0b92ff12fe3b5895..081581c9221ccfa51727b9b99d1cd2c304c5ae61 100644 (file)
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -209,17 +209,13 @@ template <class Impl>
  void
  DefaultRename<Impl>::initStage()
  {
+    // Grab the number of free entries directly from the stages.
      for (int tid=0; tid < numThreads; tid++) {
          freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
          freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid);
          freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
          emptyROB[tid] = true;
      }
-
-    // Clear these pointers so they are not accidentally used in
-    // non-initialization code.
-//    iew_ptr = NULL;
-//    commit_ptr = NULL;
  }
  
  template<class Impl>
@@ -299,6 +295,7 @@ DefaultRename<Impl>::takeOverFrom()
      _status = Inactive;
      initStage();
  
+    // Reset all state prior to taking over from the other CPU.
      for (int i=0; i< numThreads; i++) {
          renameStatus[i] = Idle;
  
@@ -326,7 +323,7 @@ DefaultRename<Impl>::squash(unsigned tid)
      if (renameStatus[tid] == Blocked ||
          renameStatus[tid] == Unblocking ||
          renameStatus[tid] == SerializeStall) {
-#if !FULL_SYSTEM
+#if 0
          // In syscall emulation, we can have both a block and a squash due
          // to a syscall in the same cycle.  This would cause both signals to
          // be high.  This shouldn't happen in full system.
@@ -344,7 +341,7 @@ DefaultRename<Impl>::squash(unsigned tid)
      // Set the status to Squashing.
      renameStatus[tid] = Squashing;
  
-    // Clear the skid buffer in case it has any data in it.
+    // Squash any instructions from decode.
      unsigned squashCount = 0;
  
      for (int i=0; i<fromDecode->size; i++) {
@@ -367,9 +364,6 @@ template <class Impl>
  void
  DefaultRename<Impl>::tick()
  {
-    // Rename will need to try to rename as many instructions as it
-    // has bandwidth, unless it is blocked.
-
      wroteToTimeBuffer = false;
  
      blockThisCycle = false;
@@ -454,8 +448,6 @@ DefaultRename<Impl>::rename(bool &status_change, unsigned tid)
      } else if (renameStatus[tid] == Unblocking) {
          renameInsts(tid);
  
-//        ++renameUnblockCycles;
-
          if (validInsts()) {
              // Add the current inputs to the skid buffer so they can be
              // reprocessed when this stage unblocks.
@@ -575,7 +567,6 @@ DefaultRename<Impl>::renameInsts(unsigned tid)
  
          insts_to_rename.pop_front();
  
-        //Use skidBuffer with oldest instructions
          if (renameStatus[tid] == Unblocking) {
              DPRINTF(Rename,"[tid:%u]: Removing [sn:%lli] PC:%#x from rename "
                      "skidBuffer\n",
@@ -711,10 +702,10 @@ void
  DefaultRename<Impl>::sortInsts()
  {
      int insts_from_decode = fromDecode->size;
-
+#ifdef DEBUG
      for (int i=0; i < numThreads; i++)
          assert(insts[i].empty());
-
+#endif
      for (int i = 0; i < insts_from_decode; ++i) {
          DynInstPtr inst = fromDecode->insts[i];
          insts[inst->threadNumber].push_back(inst);
@@ -794,8 +785,8 @@ DefaultRename<Impl>::block(unsigned tid)
              wroteToTimeBuffer = true;
          }
  
-        // Rename can not go from SerializeStall to Blocked, otherwise it would
-        // not know to complete the serialize stall.
+        // Rename can not go from SerializeStall to Blocked, otherwise
+        // it would not know to complete the serialize stall.
          if (renameStatus[tid] != SerializeStall) {
              // Set status to Blocked.
              renameStatus[tid] = Blocked;
@@ -835,15 +826,11 @@ DefaultRename<Impl>::doSquash(unsigned tid)
  
      InstSeqNum squashed_seq_num = fromCommit->commitInfo[tid].doneSeqNum;
  
-//#if FULL_SYSTEM
-//    assert(!historyBuffer[tid].empty());
-//#else
      // After a syscall squashes everything, the history buffer may be empty
      // but the ROB may still be squashing instructions.
      if (historyBuffer[tid].empty()) {
          return;
      }
-//#endif // FULL_SYSTEM
  
      // Go through the most recent instructions, undoing the mappings
      // they did and freeing up the registers.
@@ -896,8 +883,8 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, unsigned tid)
             hb_it != historyBuffer[tid].end() &&
             (*hb_it).instSeqNum <= inst_seq_num) {
  
-        DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, sequence"
-                " number %i.\n",
+        DPRINTF(Rename, "[tid:%u]: Freeing up older rename of reg %i, "
+                "[sn:%lli].\n",
                  tid, (*hb_it).prevPhysReg, (*hb_it).instSeqNum);
  
          freeList->addReg((*hb_it).prevPhysReg);
diff --git a/cpu/o3/rename_map.cc b/cpu/o3/rename_map.cc

index 8ba632e65551f7464f9008830afc360d340c56cc..fc59058a1b359294b238ec7d5a846a7f91eb416d 100644 (file)
--- a/cpu/o3/rename_map.cc
+++ b/cpu/o3/rename_map.cc
@@ -32,18 +32,12 @@
  
  using namespace std;
  
-// Todo: Consider making functions inline.  Avoid having things that are
-// using the zero register or misc registers from adding on the registers
-// to the free list.  Possibly remove the direct communication between
-// this and the freelist.  Considering making inline bool functions that
-// determine if the register is a logical int, logical fp, physical int,
-// physical fp, etc.
+// @todo: Consider making inline bool functions that determine if the
+// register is a logical int, logical fp, physical int, physical fp,
+// etc.
  
  SimpleRenameMap::~SimpleRenameMap()
  {
-    // Delete the rename maps as they were allocated with new.
-    //delete [] intRenameMap;
-    //delete [] floatRenameMap;
  }
  
  void
@@ -105,7 +99,8 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
          // Although the index refers purely to architected registers, because
          // the floating reg indices come after the integer reg indices, they
          // may exceed the size of a normal RegIndex (short).
-        for (PhysRegIndex index = numLogicalIntRegs; index < numLogicalRegs; ++index)
+        for (PhysRegIndex index = numLogicalIntRegs;
+             index < numLogicalRegs; ++index)
          {
              floatRenameMap[index].physical_reg = freg_idx++;
          }
@@ -132,14 +127,10 @@ SimpleRenameMap::init(unsigned _numLogicalIntRegs,
  void
  SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
  {
-    //Setup the interface to the freelist.
      freeList = fl_ptr;
  }
  
  
-// Don't allow this stage to fault; force that check to the rename stage.
-// Simply ask to rename a logical register and get back a new physical
-// register index.
  SimpleRenameMap::RenameInfo
  SimpleRenameMap::rename(RegIndex arch_reg)
  {
@@ -152,13 +143,11 @@ SimpleRenameMap::rename(RegIndex arch_reg)
          // requested architected register.
          prev_reg = intRenameMap[arch_reg].physical_reg;
  
-        // If it's not referencing the zero register, then mark the register
-        // as not ready.
+        // If it's not referencing the zero register, then rename the
+        // register.
          if (arch_reg != intZeroReg) {
-            // Get a free physical register to rename to.
              renamed_reg = freeList->getIntReg();
  
-            // Update the integer rename map.
              intRenameMap[arch_reg].physical_reg = renamed_reg;
  
              assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
@@ -168,20 +157,15 @@ SimpleRenameMap::rename(RegIndex arch_reg)
              renamed_reg = intZeroReg;
          }
      } else if (arch_reg < numLogicalRegs) {
-        // Subtract off the base offset for floating point registers.
-//        arch_reg = arch_reg - numLogicalIntRegs;
-
          // Record the current physical register that is renamed to the
          // requested architected register.
          prev_reg = floatRenameMap[arch_reg].physical_reg;
  
-        // If it's not referencing the zero register, then mark the register
-        // as not ready.
+        // If it's not referencing the zero register, then rename the
+        // register.
          if (arch_reg != floatZeroReg) {
-            // Get a free floating point register to rename to.
              renamed_reg = freeList->getFloatReg();
  
-            // Update the floating point rename map.
              floatRenameMap[arch_reg].physical_reg = renamed_reg;
  
              assert(renamed_reg < numPhysicalRegs &&
@@ -194,10 +178,10 @@ SimpleRenameMap::rename(RegIndex arch_reg)
          // Subtract off the base offset for miscellaneous registers.
          arch_reg = arch_reg - numLogicalRegs;
  
-        // No renaming happens to the misc. registers.  They are simply the
-        // registers that come after all the  physical registers; thus
-        // take the base architected register and add the physical registers
-        // to it.
+        // No renaming happens to the misc. registers.  They are
+        // simply the registers that come after all the physical
+        // registers; thus take the base architected register and add
+        // the physical registers to it.
          renamed_reg = arch_reg + numPhysicalRegs;
  
          // Set the previous register to the same register; mainly it must be
@@ -211,17 +195,12 @@ SimpleRenameMap::rename(RegIndex arch_reg)
      return RenameInfo(renamed_reg, prev_reg);
  }
  
-//Perhaps give this a pair as a return value, of the physical register
-//and whether or not it's ready.
  PhysRegIndex
  SimpleRenameMap::lookup(RegIndex arch_reg)
  {
      if (arch_reg < numLogicalIntRegs) {
          return intRenameMap[arch_reg].physical_reg;
      } else if (arch_reg < numLogicalRegs) {
-        // Subtract off the base FP offset.
-//        arch_reg = arch_reg - numLogicalIntRegs;
-
          return floatRenameMap[arch_reg].physical_reg;
      } else {
          // Subtract off the misc registers offset.
@@ -233,51 +212,23 @@ SimpleRenameMap::lookup(RegIndex arch_reg)
      }
  }
  
-// In this implementation the miscellaneous registers do not actually rename,
-// so this function does not allow you to try to change their mappings.
  void
  SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
  {
+    // In this implementation the miscellaneous registers do not
+    // actually rename, so this function does not allow you to try to
+    // change their mappings.
      if (arch_reg < numLogicalIntRegs) {
          DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
                  (int)arch_reg, renamed_reg);
  
          intRenameMap[arch_reg].physical_reg = renamed_reg;
      } else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
-
-
          DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
                  (int)arch_reg - numLogicalIntRegs, renamed_reg);
  
          floatRenameMap[arch_reg].physical_reg = renamed_reg;
      }
-
-    //assert(arch_reg < (numLogicalIntRegs + numLogicalFloatRegs));
-}
-
-void
-SimpleRenameMap::squash(vector<RegIndex> freed_regs,
-                        vector<UnmapInfo> unmaps)
-{
-    panic("Not sure this function should be called.");
-
-    // Not sure the rename map should be able to access the free list
-    // like this.
-    while (!freed_regs.empty()) {
-        RegIndex free_register = freed_regs.back();
-
-        if (free_register < numPhysicalIntRegs) {
-            freeList->addIntReg(free_register);
-        } else {
-            // Subtract off the base FP dependence tag.
-            free_register = free_register - numPhysicalIntRegs;
-            freeList->addFloatReg(free_register);
-        }
-
-        freed_regs.pop_back();
-    }
-
-    // Take unmap info and roll back the rename map.
  }
  
  int
diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh

index 3ecbe45c3850eeb8dc13a92fbba26237ff1d9ca1..d7e49ae833942766ce7d007c8e7699e55eb8b73f 100644 (file)
--- a/cpu/o3/rename_map.hh
+++ b/cpu/o3/rename_map.hh
@@ -101,9 +101,6 @@ class SimpleRenameMap
       */
      void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
  
-    void squash(std::vector<RegIndex> freed_regs,
-                std::vector<UnmapInfo> unmaps);
-
      int numFreeEntries();
  
    private:
@@ -153,7 +150,7 @@ class SimpleRenameMap
      };
  
      //Change this to private
-  public:
+  private:
      /** Integer rename map. */
      std::vector<RenameEntry> intRenameMap;
  
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh

index 0748850eafe120629ceec8734efd2f52ebbac212..e05eebe5a97bbf7d99cae8059815db3609c930a9 100644 (file)
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -53,9 +53,7 @@ class ROB
      enum Status {
          Running,
          Idle,
-        ROBSquashing,
-        DcacheMissStall,
-        DcacheMissComplete
+        ROBSquashing
      };
  
      /** SMT ROB Sharing Policy */
@@ -112,7 +110,7 @@ class ROB
       *  no guarantee as to the return value if the ROB is empty.
       *  @retval Pointer to the DynInst that is at the head of the ROB.
       */
-    DynInstPtr readHeadInst();
+//    DynInstPtr readHeadInst();
  
      /** Returns a pointer to the head instruction of a specific thread within
       *  the ROB.
@@ -124,7 +122,7 @@ class ROB
       *  no guarantee as to the return value if the ROB is empty.
       *  @retval Pointer to the DynInst that is at the tail of the ROB.
       */
-    DynInstPtr readTailInst();
+//    DynInstPtr readTailInst();
  
      /** Returns a pointer to the tail instruction of a specific thread within
       *  the ROB.
@@ -133,7 +131,7 @@ class ROB
      DynInstPtr readTailInst(unsigned tid);
  
      /** Retires the head instruction, removing it from the ROB. */
-    void retireHead();
+//    void retireHead();
  
      /** Retires the head instruction of a specific thread, removing it from the
       *  ROB.
@@ -141,7 +139,7 @@ class ROB
      void retireHead(unsigned tid);
  
      /** Is the oldest instruction across all threads ready. */
-    bool isHeadReady();
+//    bool isHeadReady();
  
      /** Is the oldest instruction across a particular thread ready. */
      bool isHeadReady(unsigned tid);
@@ -200,35 +198,35 @@ class ROB
      void updateTail();
  
      /** Reads the PC of the oldest head instruction. */
-    uint64_t readHeadPC();
+//    uint64_t readHeadPC();
  
      /** Reads the PC of the head instruction of a specific thread. */
-    uint64_t readHeadPC(unsigned tid);
+//    uint64_t readHeadPC(unsigned tid);
  
      /** Reads the next PC of the oldest head instruction. */
-    uint64_t readHeadNextPC();
+//    uint64_t readHeadNextPC();
  
      /** Reads the next PC of the head instruction of a specific thread. */
-    uint64_t readHeadNextPC(unsigned tid);
+//    uint64_t readHeadNextPC(unsigned tid);
  
      /** Reads the sequence number of the oldest head instruction. */
-    InstSeqNum readHeadSeqNum();
+//    InstSeqNum readHeadSeqNum();
  
      /** Reads the sequence number of the head instruction of a specific thread.
       */
-    InstSeqNum readHeadSeqNum(unsigned tid);
+//    InstSeqNum readHeadSeqNum(unsigned tid);
  
      /** Reads the PC of the youngest tail instruction. */
-    uint64_t readTailPC();
+//    uint64_t readTailPC();
  
      /** Reads the PC of the tail instruction of a specific thread. */
-    uint64_t readTailPC(unsigned tid);
+//    uint64_t readTailPC(unsigned tid);
  
      /** Reads the sequence number of the youngest tail instruction. */
-    InstSeqNum readTailSeqNum();
+//    InstSeqNum readTailSeqNum();
  
      /** Reads the sequence number of tail instruction of a specific thread. */
-    InstSeqNum readTailSeqNum(unsigned tid);
+//    InstSeqNum readTailSeqNum(unsigned tid);
  
      /** Checks if the ROB is still in the process of squashing instructions.
       *  @retval Whether or not the ROB is done squashing.
diff --git a/cpu/o3/rob_impl.hh b/cpu/o3/rob_impl.hh

index 02a4bfbee67f92e98a32122e844a6d80958813c0..25e0c80fd8743fc4e4d2e057487e9573cac087c0 100644 (file)
--- a/cpu/o3/rob_impl.hh
+++ b/cpu/o3/rob_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -201,20 +201,15 @@ template <class Impl>
  void
  ROB<Impl>::insertInst(DynInstPtr &inst)
  {
-    // Make sure we have the right number of instructions.
      //assert(numInstsInROB == countInsts());
-
-    // Make sure the instruction is valid.
      assert(inst);
  
      DPRINTF(ROB, "Adding inst PC %#x to the ROB.\n", inst->readPC());
  
-    // If the ROB is full then exit.
      assert(numInstsInROB != numEntries);
  
      int tid = inst->threadNumber;
  
-    // Place into ROB
      instList[tid].push_back(inst);
  
      //Set Up head iterator if this is the 1st instruction in the ROB
@@ -228,10 +223,8 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
      tail = instList[tid].end();
      tail--;
  
-    // Mark as set in ROB
      inst->setInROB();
  
-    // Increment ROB count
      ++numInstsInROB;
      ++threadEntries[tid];
  
@@ -242,6 +235,7 @@ ROB<Impl>::insertInst(DynInstPtr &inst)
  
  // Whatever calls this function needs to ensure that it properly frees up
  // registers prior to this function.
+/*
  template <class Impl>
  void
  ROB<Impl>::retireHead()
@@ -249,7 +243,6 @@ ROB<Impl>::retireHead()
      //assert(numInstsInROB == countInsts());
      assert(numInstsInROB > 0);
  
-    // Get the head ROB instruction's TID.
      int tid = (*head)->threadNumber;
  
      retireHead(tid);
@@ -258,6 +251,7 @@ ROB<Impl>::retireHead()
          tail = instList[tid].end();
      }
  }
+*/
  
  template <class Impl>
  void
@@ -271,18 +265,15 @@ ROB<Impl>::retireHead(unsigned tid)
  
      DynInstPtr head_inst = (*head_it);
  
-    // Make certain this can retire.
      assert(head_inst->readyToCommit());
  
      DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
              "instruction PC %#x,[sn:%lli]\n", tid, head_inst->readPC(),
              head_inst->seqNum);
  
-    // Keep track of how many instructions are in the ROB.
      --numInstsInROB;
      --threadEntries[tid];
  
-    //Mark DynInstFlags
      head_inst->removeInROB();
      head_inst->setCommitted();
  
@@ -291,12 +282,12 @@ ROB<Impl>::retireHead(unsigned tid)
      //Update "Global" Head of ROB
      updateHead();
  
-    // A special case is needed if the instruction being retired is the
-    // only instruction in the ROB; otherwise the tail iterator will become
-    // invalidated.
+    // @todo: A special case is needed if the instruction being
+    // retired is the only instruction in the ROB; otherwise the tail
+    // iterator will become invalidated.
      cpu->removeFrontInst(head_inst);
  }
-
+/*
  template <class Impl>
  bool
  ROB<Impl>::isHeadReady()
@@ -307,7 +298,7 @@ ROB<Impl>::isHeadReady()
  
      return false;
  }
-
+*/
  template <class Impl>
  bool
  ROB<Impl>::isHeadReady(unsigned tid)
@@ -537,7 +528,7 @@ ROB<Impl>::squash(InstSeqNum squash_num,unsigned tid)
          doSquash(tid);
      }
  }
-
+/*
  template <class Impl>
  typename Impl::DynInstPtr
  ROB<Impl>::readHeadInst()
@@ -549,7 +540,7 @@ ROB<Impl>::readHeadInst()
          return dummyInst;
      }
  }
-
+*/
  template <class Impl>
  typename Impl::DynInstPtr
  ROB<Impl>::readHeadInst(unsigned tid)
@@ -564,7 +555,7 @@ ROB<Impl>::readHeadInst(unsigned tid)
          return dummyInst;
      }
  }
-
+/*
  template <class Impl>
  uint64_t
  ROB<Impl>::readHeadPC()
@@ -608,7 +599,6 @@ ROB<Impl>::readHeadNextPC(unsigned tid)
      return (*head_thread)->readNextPC();
  }
  
-
  template <class Impl>
  InstSeqNum
  ROB<Impl>::readHeadSeqNum()
@@ -637,7 +627,7 @@ ROB<Impl>::readTailInst()
  
      return (*tail);
  }
-
+*/
  template <class Impl>
  typename Impl::DynInstPtr
  ROB<Impl>::readTailInst(unsigned tid)
@@ -650,7 +640,7 @@ ROB<Impl>::readTailInst(unsigned tid)
      return *tail_thread;
  }
  
-
+/*
  template <class Impl>
  uint64_t
  ROB<Impl>::readTailPC()
@@ -698,4 +688,4 @@ ROB<Impl>::readTailSeqNum(unsigned tid)
  
      return (*tail_thread)->seqNum;
  }
-
+*/
diff --git a/cpu/o3/scoreboard.cc b/cpu/o3/scoreboard.cc

index 87b0aee9460a1ebf567326faa1f43fc43217bdb2..b0e433620256f22ee0946001570e03b693762036 100644 (file)
--- a/cpu/o3/scoreboard.cc
+++ b/cpu/o3/scoreboard.cc
@@ -99,6 +99,7 @@ Scoreboard::unsetReg(PhysRegIndex ready_reg)
      if (ready_reg == zeroRegIdx ||
          ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
          // Don't do anything if int or fp zero reg.
+        return;
      }
  
      regScoreBoard[ready_reg] = 0;
diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc

index a685646f377210c743da6cf3daf50dd22d643cfc..0c957c8c7f1fbac8d3b2da875fca7b4d94988923 100644 (file)
--- a/cpu/o3/store_set.cc
+++ b/cpu/o3/store_set.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@ -278,11 +278,6 @@ StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
  void
  StoreSet::squash(InstSeqNum squashed_num, unsigned tid)
  {
-    // Not really sure how to do this well.
-    // Generally this is small enough that it should be okay; short circuit
-    // evaluation should take care of invalid entries.
-    // Maybe keep a list of valid LFST's?  Really ugly either way...
-
      DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
              squashed_num);
  
diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh

index 17719bdebbc817abc386973eed058e0fcb186725..2c9788e4bcb93e4a21675bc6e8f9736b7a322b5a 100644 (file)
--- a/cpu/o3/thread_state.hh
+++ b/cpu/o3/thread_state.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
  
  #ifndef __CPU_O3_THREAD_STATE_HH__
  #define __CPU_O3_THREAD_STATE_HH__
@@ -15,27 +42,17 @@ class EndQuiesceEvent;
  class FunctionProfile;
  class ProfileNode;
  #else
-class Process;
  class FunctionalMemory;
+class Process;
  #endif
  
-// In the new CPU case this may be quite small...It depends on what I define
-// ThreadState to be.  Currently it's only the state that exists within
-// ExecContext basically.  Leaves the interface and manipulation up to the
-// CPU.  Not sure this is useful/flexible...probably can be if I can avoid
-// including state here that parts of the pipeline can't modify directly,
-// or at least don't let them.  The only problem is for state that's needed
-// per thread, per structure.  I.e. rename table, memreqs.
-// On the other hand, it might be nice to not have to pay the extra pointer
-// lookup to get frequently used state such as a memreq (that isn't used much
-// elsewhere)...
-
-// Maybe this ozone thread state should only really have committed state?
-// I need to think about why I'm using this and what it's useful for.  Clearly
-// has benefits for SMT; basically serves same use as CPUExecContext.
-// Makes the ExecContext proxy easier.  Gives organization/central access point
-// to state of a thread that can be accessed normally (i.e. not in-flight
-// stuff within a OoO processor).  Does this need an XC proxy within it?
+/**
+ * Class that has various thread state, such as the status, the
+ * current instruction being processed, whether or not the thread has
+ * a trap pending or is being externally updated, the ExecContext
+ * proxy pointer, etc.  It also handles anything related to a specific
+ * thread's process, such as syscalls and checking valid addresses.
+ */
  template <class Impl>
  struct O3ThreadState : public ThreadState {
      typedef ExecContext::Status Status;
@@ -43,7 +60,7 @@ struct O3ThreadState : public ThreadState {
  
      Status _status;
  
-    // Current instruction?
+    // Current instruction
      TheISA::MachInst inst;
    private:
      FullCPU *cpu;
@@ -80,51 +97,11 @@ struct O3ThreadState : public ThreadState {
      void setStatus(Status new_status) { _status = new_status; }
  
  #if !FULL_SYSTEM
-
-    Fault dummyTranslation(MemReqPtr &req)
-    {
-#if 0
-        assert((req->vaddr >> 48 & 0xffff) == 0);
-#endif
-
-        // put the asid in the upper 16 bits of the paddr
-        req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
-        req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
-        return NoFault;
-    }
-    Fault translateInstReq(MemReqPtr &req)
-    {
-        return dummyTranslation(req);
-    }
-    Fault translateDataReadReq(MemReqPtr &req)
-    {
-        return dummyTranslation(req);
-    }
-    Fault translateDataWriteReq(MemReqPtr &req)
-    {
-        return dummyTranslation(req);
-    }
-
      bool validInstAddr(Addr addr)
      { return process->validInstAddr(addr); }
  
      bool validDataAddr(Addr addr)
      { return process->validDataAddr(addr); }
-#else
-    Fault translateInstReq(MemReqPtr &req)
-    {
-        return cpu->itb->translate(req);
-    }
-
-    Fault translateDataReadReq(MemReqPtr &req)
-    {
-        return cpu->dtb->translate(req, false);
-    }
-
-    Fault translateDataWriteReq(MemReqPtr &req)
-    {
-        return cpu->dtb->translate(req, true);
-    }
  #endif
  
      bool misspeculating() { return false; }
author	Kevin Lim <ktlim@umich.edu>
	Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)
committer	Kevin Lim <ktlim@umich.edu>
	Fri, 19 May 2006 19:53:17 +0000 (15:53 -0400)
cpu/o3/2bit_local_pred.cc		patch \| blob \| history
cpu/o3/2bit_local_pred.hh		patch \| blob \| history
cpu/o3/alpha_cpu.hh		patch \| blob \| history
cpu/o3/bpred_unit.cc		patch \| blob \| history
cpu/o3/bpred_unit.hh		patch \| blob \| history
cpu/o3/bpred_unit_impl.hh		patch \| blob \| history
cpu/o3/comm.hh		patch \| blob \| history
cpu/o3/commit.hh		patch \| blob \| history
cpu/o3/commit_impl.hh		patch \| blob \| history
cpu/o3/decode.hh		patch \| blob \| history
cpu/o3/decode_impl.hh		patch \| blob \| history
cpu/o3/fetch.hh		patch \| blob \| history
cpu/o3/fetch_impl.hh		patch \| blob \| history
cpu/o3/lsq.hh		patch \| blob \| history
cpu/o3/lsq_impl.hh		patch \| blob \| history
cpu/o3/lsq_unit.hh		patch \| blob \| history
cpu/o3/lsq_unit_impl.hh		patch \| blob \| history
cpu/o3/mem_dep_unit.hh		patch \| blob \| history
cpu/o3/mem_dep_unit_impl.hh		patch \| blob \| history
cpu/o3/rename.hh		patch \| blob \| history
cpu/o3/rename_impl.hh		patch \| blob \| history
cpu/o3/rename_map.cc		patch \| blob \| history
cpu/o3/rename_map.hh		patch \| blob \| history
cpu/o3/rob.hh		patch \| blob \| history
cpu/o3/rob_impl.hh		patch \| blob \| history
cpu/o3/scoreboard.cc		patch \| blob \| history
cpu/o3/store_set.cc		patch \| blob \| history
cpu/o3/thread_state.hh		patch \| blob \| history