Comments and code cleanup.

author Kevin Lim <ktlim@umich.edu>

Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)

committer Kevin Lim <ktlim@umich.edu>

Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)
author Kevin Lim <ktlim@umich.edu>
Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)
committer Kevin Lim <ktlim@umich.edu>
Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)
diff --git a/cpu/activity.cc b/cpu/activity.cc

index 6dcb6e34125d1ccf82f72c7b3df7230b8fd64b9b..b0b16446cc94b5a0c7564209ea3950d41b2edeb1 100644 (file)
--- a/cpu/activity.cc
+++ b/cpu/activity.cc
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
  
  #include "base/timebuf.hh"
  #include "cpu/activity.hh"
@@ -14,6 +41,8 @@ ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency,
  void
  ActivityRecorder::activity()
  {
+    // If we've already recorded activity for this cycle, we don't
+    // want to increment the count any more.
      if (activityBuffer[0]) {
          return;
      }
@@ -28,6 +57,8 @@ ActivityRecorder::activity()
  void
  ActivityRecorder::advance()
  {
+    // If there's a 1 in the slot that is about to be erased once the
+    // time buffer advances, then decrement the activityCount.
      if (activityBuffer[-longestLatency]) {
          --activityCount;
  
@@ -46,6 +77,7 @@ ActivityRecorder::advance()
  void
  ActivityRecorder::activateStage(const int idx)
  {
+    // Increment the activity count if this stage wasn't already active.
      if (!stageActive[idx]) {
          ++activityCount;
  
@@ -62,6 +94,7 @@ ActivityRecorder::activateStage(const int idx)
  void
  ActivityRecorder::deactivateStage(const int idx)
  {
+    // Decrement the activity count if this stage was active.
      if (stageActive[idx]) {
          --activityCount;
  
diff --git a/cpu/activity.hh b/cpu/activity.hh

index 2d53dc4bbb89a4a2b18bbd8d92614128a5bc6019..2c0df5efb0797a02776c7b5023be978334c5dc96 100644 (file)
--- a/cpu/activity.hh
+++ b/cpu/activity.hh
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
  
  #ifndef __CPU_ACTIVITY_HH__
  #define __CPU_ACTIVITY_HH__
@@ -5,33 +32,61 @@
  #include "base/timebuf.hh"
  #include "base/trace.hh"
  
+/**
+ * ActivityRecorder helper class that informs the CPU if it can switch
+ * over to being idle or not.  It works by having a time buffer as
+ * long as any time buffer in the CPU, and the CPU and all of its
+ * stages inform the ActivityRecorder when they write to any time
+ * buffer.  The ActivityRecorder marks a 1 in the "0" slot of the time
+ * buffer any time a stage writes to a time buffer, and it advances
+ * its time buffer at the same time as all other stages.  The
+ * ActivityRecorder also records if a stage has activity to do next
+ * cycle.  The recorder keeps a count of these two.  Thus any time the
+ * count is non-zero, there is either communication still in flight,
+ * or activity that still must be done, meaning that the CPU can not
+ * idle.  If count is zero, then the CPU can safely idle as it has no
+ * more outstanding work to do.
+ */
  class ActivityRecorder {
    public:
      ActivityRecorder(int num_stages, int longest_latency, int count);
  
      /** Records that there is activity this cycle. */
      void activity();
-    /** Advances the activity buffer, decrementing the activityCount if active
-     *  communication just left the time buffer, and descheduling the CPU if
-     *  there is no activity.
+
+    /** Advances the activity buffer, decrementing the activityCount
+     *  if active communication just left the time buffer, and
+     *  determining if there is no activity.
       */
      void advance();
+
      /** Marks a stage as active. */
      void activateStage(const int idx);
+
      /** Deactivates a stage. */
      void deactivateStage(const int idx);
  
+    /** Returns how many things are active within the recorder. */
      int getActivityCount() { return activityCount; }
  
+    /** Sets the count to a starting value.  Can be used to disable
+     * the idling option.
+     */
      void setActivityCount(int count)
      { activityCount = count; }
  
+    /** Returns if the CPU should be active. */
      bool active() { return activityCount; }
  
+    /** Clears the time buffer and the activity count. */
      void reset();
  
+    /** Debug function to dump the contents of the time buffer. */
      void dump();
  
+    /** Debug function to ensure that the activity count matches the
+     * contents of the time buffer.
+     */
      void validate();
  
    private:
@@ -45,6 +100,7 @@ class ActivityRecorder {
       */
      TimeBuffer<bool> activityBuffer;
  
+    /** Longest latency time buffer in the CPU. */
      int longestLatency;
  
      /** Tracks how many stages and cycles of time buffer have
@@ -58,6 +114,7 @@ class ActivityRecorder {
       */
      int activityCount;
  
+    /** Number of stages that can be marked as active or inactive. */
      int numStages;
  
      /** Records which stages are active/inactive. */
diff --git a/cpu/base_dyn_inst.cc b/cpu/base_dyn_inst.cc

index 7ab760ae3ef20f53bad3753bc1b2f3d74dfc526d..64a995689c0ad65680bc1be30a4a24e11b317fdc 100644 (file)
--- a/cpu/base_dyn_inst.cc
+++ b/cpu/base_dyn_inst.cc
@@ -166,6 +166,8 @@ BaseDynInst<Impl>::~BaseDynInst()
          delete traceData;
      }
  
+    fault = NoFault;
+
      --instcount;
  
      DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n",
@@ -289,7 +291,7 @@ BaseDynInst<Impl>::copy(Addr dest)
  {
      uint8_t data[64];
      FunctionalMemory *mem = thread->mem;
-    assert(thread->copySrcPhysAddr || thread->misspeculating());
+    assert(thread->copySrcPhysAddr);
      MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
      req->asid = asid;
  
diff --git a/cpu/o3/alpha_cpu.hh b/cpu/o3/alpha_cpu.hh

index 5c89e34628de2c96227ff2d65c28dece6a8ff425..4c452c4ddbba111e60443e10970ecfd9ceb593c8 100644 (file)
--- a/cpu/o3/alpha_cpu.hh
+++ b/cpu/o3/alpha_cpu.hh
@@ -39,6 +39,14 @@ namespace Kernel {
      class Statistics;
  };
  
+/**
+ * AlphaFullCPU class.  Derives from the FullO3CPU class, and
+ * implements all ISA and implementation specific functions of the
+ * CPU.  This is the CPU class that is used for the SimObjects, and is
+ * what is given to the DynInsts.  Most of its state exists in the
+ * FullO3CPU; the state is has is mainly for ISA specific
+ * functionality.
+ */
  template <class Impl>
  class AlphaFullCPU : public FullO3CPU<Impl>
  {
@@ -56,145 +64,211 @@ class AlphaFullCPU : public FullO3CPU<Impl>
      /** Constructs an AlphaFullCPU with the given parameters. */
      AlphaFullCPU(Params *params);
  
+    /**
+     * Derived ExecContext class for use with the AlphaFullCPU.  It
+     * provides the interface for any external objects to access a
+     * single thread's state and some general CPU state.  Any time
+     * external objects try to update state through this interface,
+     * the CPU will create an event to squash all in-flight
+     * instructions in order to ensure state is maintained correctly.
+     */
      class AlphaXC : public ExecContext
      {
        public:
+        /** Pointer to the CPU. */
          AlphaFullCPU<Impl> *cpu;
  
+        /** Pointer to the thread state that this XC corrseponds to. */
          O3ThreadState<Impl> *thread;
  
+        /** Returns a pointer to this CPU. */
          virtual BaseCPU *getCpuPtr() { return cpu; }
  
+        /** Sets this CPU's ID. */
          virtual void setCpuId(int id) { cpu->cpu_id = id; }
  
+        /** Reads this CPU's ID. */
          virtual int readCpuId() { return cpu->cpu_id; }
  
+        /** Returns a pointer to functional memory. */
          virtual FunctionalMemory *getMemPtr() { return thread->mem; }
  
  #if FULL_SYSTEM
+        /** Returns a pointer to the system. */
          virtual System *getSystemPtr() { return cpu->system; }
  
+        /** Returns a pointer to physical memory. */
          virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
  
+        /** Returns a pointer to the ITB. */
          virtual AlphaITB *getITBPtr() { return cpu->itb; }
  
-        virtual AlphaDTB * getDTBPtr() { return cpu->dtb; }
+        /** Returns a pointer to the DTB. */
+        virtual AlphaDTB *getDTBPtr() { return cpu->dtb; }
  
+        /** Returns a pointer to this thread's kernel statistics. */
          virtual Kernel::Statistics *getKernelStats()
          { return thread->kernelStats; }
  #else
+        /** Returns a pointer to this thread's process. */
          virtual Process *getProcessPtr() { return thread->process; }
  #endif
-
+        /** Returns this thread's status. */
          virtual Status status() const { return thread->status(); }
  
+        /** Sets this thread's status. */
          virtual void setStatus(Status new_status)
          { thread->setStatus(new_status); }
  
-        /// Set the status to Active.  Optional delay indicates number of
-        /// cycles to wait before beginning execution.
+        /** Set the status to Active.  Optional delay indicates number of
+         * cycles to wait before beginning execution. */
          virtual void activate(int delay = 1);
  
-        /// Set the status to Suspended.
+        /** Set the status to Suspended. */
          virtual void suspend();
  
-        /// Set the status to Unallocated.
+        /** Set the status to Unallocated. */
          virtual void deallocate();
  
-        /// Set the status to Halted.
+        /** Set the status to Halted. */
          virtual void halt();
  
  #if FULL_SYSTEM
+        /** Dumps the function profiling information.
+         * @todo: Implement.
+         */
          virtual void dumpFuncProfile();
  #endif
-
+        /** Takes over execution of a thread from another CPU. */
          virtual void takeOverFrom(ExecContext *old_context);
  
+        /** Registers statistics associated with this XC. */
          virtual void regStats(const std::string &name);
  
+        /** Serializes state. */
          virtual void serialize(std::ostream &os);
+        /** Unserializes state. */
          virtual void unserialize(Checkpoint *cp, const std::string &section);
  
  #if FULL_SYSTEM
+        /** Returns pointer to the quiesce event. */
          virtual EndQuiesceEvent *getQuiesceEvent();
  
+        /** Reads the last tick that this thread was activated on. */
          virtual Tick readLastActivate();
+        /** Reads the last tick that this thread was suspended on. */
          virtual Tick readLastSuspend();
  
+        /** Clears the function profiling information. */
          virtual void profileClear();
+        /** Samples the function profiling information. */
          virtual void profileSample();
  #endif
-
+        /** Returns this thread's ID number. */
          virtual int getThreadNum() { return thread->tid; }
  
+        /** Returns the instruction this thread is currently committing.
+         *  Only used when an instruction faults.
+         */
          virtual TheISA::MachInst getInst();
  
+        /** Copies the architectural registers from another XC into this XC. */
          virtual void copyArchRegs(ExecContext *xc);
  
+        /** Resets all architectural registers to 0. */
          virtual void clearArchRegs();
  
+        /** Reads an integer register. */
          virtual uint64_t readIntReg(int reg_idx);
  
+        /** Reads a single precision floating point register. */
          virtual float readFloatRegSingle(int reg_idx);
  
+        /** Reads a double precision floating point register. */
          virtual double readFloatRegDouble(int reg_idx);
  
+        /** Reads a floating point register as an integer value. */
          virtual uint64_t readFloatRegInt(int reg_idx);
  
+        /** Sets an integer register to a value. */
          virtual void setIntReg(int reg_idx, uint64_t val);
  
+        /** Sets a single precision fp register to a value. */
          virtual void setFloatRegSingle(int reg_idx, float val);
  
+        /** Sets a double precision fp register to a value. */
          virtual void setFloatRegDouble(int reg_idx, double val);
  
+        /** Sets a fp register to an integer value. */
          virtual void setFloatRegInt(int reg_idx, uint64_t val);
  
+        /** Reads this thread's PC. */
          virtual uint64_t readPC()
          { return cpu->readPC(thread->tid); }
  
+        /** Sets this thread's PC. */
          virtual void setPC(uint64_t val);
  
+        /** Reads this thread's next PC. */
          virtual uint64_t readNextPC()
          { return cpu->readNextPC(thread->tid); }
  
+        /** Sets this thread's next PC. */
          virtual void setNextPC(uint64_t val);
  
+        /** Reads a miscellaneous register. */
          virtual MiscReg readMiscReg(int misc_reg)
          { return cpu->readMiscReg(misc_reg, thread->tid); }
  
+        /** Reads a misc. register, including any side-effects the
+         * read might have as defined by the architecture. */
          virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
          { return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
  
+        /** Sets a misc. register. */
          virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
  
+        /** Sets a misc. register, including any side-effects the
+         * write might have as defined by the architecture. */
          virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
  
+        /** Returns the number of consecutive store conditional failures. */
          // @todo: Figure out where these store cond failures should go.
          virtual unsigned readStCondFailures()
          { return thread->storeCondFailures; }
  
+        /** Sets the number of consecutive store conditional failures. */
          virtual void setStCondFailures(unsigned sc_failures)
          { thread->storeCondFailures = sc_failures; }
  
  #if FULL_SYSTEM
+        /** Returns if the thread is currently in PAL mode, based on
+         * the PC's value. */
          virtual bool inPalMode()
          { return TheISA::PcPAL(cpu->readPC(thread->tid)); }
  #endif
-
          // Only really makes sense for old CPU model.  Lots of code
          // outside the CPU still checks this function, so it will
          // always return false to keep everything working.
+        /** Checks if the thread is misspeculating.  Because it is
+         * very difficult to determine if the thread is
+         * misspeculating, this is set as false. */
          virtual bool misspeculating() { return false; }
  
  #if !FULL_SYSTEM
+        /** Gets a syscall argument by index. */
          virtual IntReg getSyscallArg(int i);
  
+        /** Sets a syscall argument. */
          virtual void setSyscallArg(int i, IntReg val);
  
+        /** Sets the syscall return value. */
          virtual void setSyscallReturn(SyscallReturn return_value);
  
+        /** Executes a syscall in SE mode. */
          virtual void syscall() { return cpu->syscall(thread->tid); }
  
+        /** Reads the funcExeInst counter. */
          virtual Counter readFuncExeInst() { return thread->funcExeInst; }
  #endif
      };
@@ -260,19 +334,32 @@ class AlphaFullCPU : public FullO3CPU<Impl>
      }
  
  #endif
+    /** Reads a miscellaneous register. */
      MiscReg readMiscReg(int misc_reg, unsigned tid);
  
+    /** Reads a misc. register, including any side effects the read
+     * might have as defined by the architecture.
+     */
      MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
  
+    /** Sets a miscellaneous register. */
      Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
  
+    /** Sets a misc. register, including any side effects the write
+     * might have as defined by the architecture.
+     */
      Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
  
+    /** Initiates a squash of all in-flight instructions for a given
+     * thread.  The source of the squash is an external update of
+     * state through the XC.
+     */
      void squashFromXC(unsigned tid);
  
  #if FULL_SYSTEM
+    /** Posts an interrupt. */
      void post_interrupt(int int_num, int index);
-
+    /** Reads the interrupt flag. */
      int readIntrFlag();
      /** Sets the interrupt flags. */
      void setIntrFlag(int val);
@@ -298,7 +385,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
      /** Executes a syscall.
       * @todo: Determine if this needs to be virtual.
       */
-    void syscall(int thread_num);
+    void syscall(int tid);
      /** Gets a syscall argument. */
      IntReg getSyscallArg(int i, int tid);
  
@@ -424,6 +511,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
  
      Addr lockAddr;
  
+    /** Temporary fix for the lock flag, works in the UP case. */
      bool lockFlag;
  };
  
diff --git a/cpu/o3/alpha_cpu_impl.hh b/cpu/o3/alpha_cpu_impl.hh

index 91cd3d9e679697c969e4b947307868db33447c9f..f39fdf6b65c2bff69d42b68b2467a4cc4f7016b3 100644 (file)
--- a/cpu/o3/alpha_cpu_impl.hh
+++ b/cpu/o3/alpha_cpu_impl.hh
@@ -59,10 +59,12 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
  {
      DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
  
+    // Setup any thread state.
      this->thread.resize(this->numThreads);
  
      for (int i = 0; i < this->numThreads; ++i) {
  #if FULL_SYSTEM
+        // SMT is not supported in FS mode yet.
          assert(this->numThreads == 1);
          this->thread[i] = new Thread(this, 0, params->mem);
          this->thread[i]->setStatus(ExecContext::Suspended);
@@ -87,29 +89,34 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
          }
  #endif // !FULL_SYSTEM
  
-        this->thread[i]->numInst = 0;
-
          ExecContext *xc_proxy;
  
-        AlphaXC *alpha_xc_proxy = new AlphaXC;
+        // Setup the XC that will serve as the interface to the threads/CPU.
+        AlphaXC *alpha_xc = new AlphaXC;
  
+        // If we're using a checker, then the XC should be the
+        // CheckerExecContext.
          if (params->checker) {
-            xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker);
+            xc_proxy = new CheckerExecContext<AlphaXC>(
+                alpha_xc, this->checker);
          } else {
-            xc_proxy = alpha_xc_proxy;
+            xc_proxy = alpha_xc;
          }
  
-        alpha_xc_proxy->cpu = this;
-        alpha_xc_proxy->thread = this->thread[i];
+        alpha_xc->cpu = this;
+        alpha_xc->thread = this->thread[i];
  
  #if FULL_SYSTEM
+        // Setup quiesce event.
          this->thread[i]->quiesceEvent =
              new EndQuiesceEvent(xc_proxy);
          this->thread[i]->lastActivate = 0;
          this->thread[i]->lastSuspend = 0;
  #endif
+        // Give the thread the XC.
          this->thread[i]->xcProxy = xc_proxy;
  
+        // Add the XC to the CPU's list of XC's.
          this->execContexts.push_back(xc_proxy);
      }
  
@@ -171,6 +178,7 @@ AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
      setStatus(old_context->status());
      copyArchRegs(old_context);
      setCpuId(old_context->readCpuId());
+
  #if !FULL_SYSTEM
      thread->funcExeInst = old_context->readFuncExeInst();
  #else
@@ -394,7 +402,6 @@ template <class Impl>
  uint64_t
  AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
  {
-    DPRINTF(Fault, "Reading int register through the XC!\n");
      return cpu->readArchIntReg(reg_idx, thread->tid);
  }
  
@@ -402,7 +409,6 @@ template <class Impl>
  float
  AlphaFullCPU<Impl>::AlphaXC::readFloatRegSingle(int reg_idx)
  {
-    DPRINTF(Fault, "Reading float register through the XC!\n");
      return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
  }
  
@@ -410,7 +416,6 @@ template <class Impl>
  double
  AlphaFullCPU<Impl>::AlphaXC::readFloatRegDouble(int reg_idx)
  {
-    DPRINTF(Fault, "Reading float register through the XC!\n");
      return cpu->readArchFloatRegDouble(reg_idx, thread->tid);
  }
  
@@ -418,7 +423,6 @@ template <class Impl>
  uint64_t
  AlphaFullCPU<Impl>::AlphaXC::readFloatRegInt(int reg_idx)
  {
-    DPRINTF(Fault, "Reading floatint register through the XC!\n");
      return cpu->readArchFloatRegInt(reg_idx, thread->tid);
  }
  
@@ -426,9 +430,9 @@ template <class Impl>
  void
  AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
  {
-    DPRINTF(Fault, "Setting int register through the XC!\n");
      cpu->setArchIntReg(reg_idx, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -438,9 +442,9 @@ template <class Impl>
  void
  AlphaFullCPU<Impl>::AlphaXC::setFloatRegSingle(int reg_idx, float val)
  {
-    DPRINTF(Fault, "Setting float register through the XC!\n");
      cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -450,9 +454,9 @@ template <class Impl>
  void
  AlphaFullCPU<Impl>::AlphaXC::setFloatRegDouble(int reg_idx, double val)
  {
-    DPRINTF(Fault, "Setting float register through the XC!\n");
      cpu->setArchFloatRegDouble(reg_idx, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -462,9 +466,9 @@ template <class Impl>
  void
  AlphaFullCPU<Impl>::AlphaXC::setFloatRegInt(int reg_idx, uint64_t val)
  {
-    DPRINTF(Fault, "Setting floatint register through the XC!\n");
      cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -476,6 +480,7 @@ AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val)
  {
      cpu->setPC(val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -487,6 +492,7 @@ AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val)
  {
      cpu->setNextPC(val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -496,10 +502,9 @@ template <class Impl>
  Fault
  AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
  {
-    DPRINTF(Fault, "Setting misc register through the XC!\n");
-
      Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -509,12 +514,12 @@ AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
  
  template <class Impl>
  Fault
-AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
+AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg,
+                                                  const MiscReg &val)
  {
-    DPRINTF(Fault, "Setting misc register through the XC!\n");
-
      Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
  
+    // Squash if we're not already in a state update mode.
      if (!thread->trapPending && !thread->inSyscall) {
          cpu->squashFromXC(thread->tid);
      }
@@ -595,7 +600,6 @@ AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
  
      if (this->thread[0]->status() == ExecContext::Suspended) {
          DPRINTF(IPI,"Suspended Processor awoke\n");
-//     xcProxies[0]->activate();
          this->execContexts[0]->activate();
      }
  }
@@ -658,6 +662,7 @@ template <class Impl>
  void
  AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
  {
+    // Pass the thread's XC into the invoke method.
      fault->invoke(this->execContexts[tid]);
  }
  
@@ -708,6 +713,7 @@ AlphaFullCPU<Impl>::processInterrupts()
      if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
          this->setMiscReg(IPR_ISR, summary, 0);
          this->setMiscReg(IPR_INTID, ipl, 0);
+        // Checker needs to know these two registers were updated.
          if (this->checker) {
              this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
              this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
diff --git a/cpu/o3/alpha_dyn_inst.hh b/cpu/o3/alpha_dyn_inst.hh

index 1c5b738aae580ccb10979f9d9477e895ff08b0e5..de4d403581385500300809a9445868bf88a5d109 100644 (file)
--- a/cpu/o3/alpha_dyn_inst.hh
+++ b/cpu/o3/alpha_dyn_inst.hh
@@ -86,23 +86,31 @@ class AlphaDynInst : public BaseDynInst<Impl>
      void initVars();
  
    public:
+    /** Reads a miscellaneous register. */
      MiscReg readMiscReg(int misc_reg)
      {
          return this->cpu->readMiscReg(misc_reg, this->threadNumber);
      }
  
+    /** Reads a misc. register, including any side-effects the read
+     * might have as defined by the architecture.
+     */
      MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
      {
          return this->cpu->readMiscRegWithEffect(misc_reg, fault,
                                                  this->threadNumber);
      }
  
+    /** Sets a misc. register. */
      Fault setMiscReg(int misc_reg, const MiscReg &val)
      {
          this->instResult.integer = val;
          return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
      }
  
+    /** Sets a misc. register, including any side-effects the write
+     * might have as defined by the architecture.
+     */
      Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
      {
          return this->cpu->setMiscRegWithEffect(misc_reg, val,
diff --git a/cpu/o3/alpha_dyn_inst_impl.hh b/cpu/o3/alpha_dyn_inst_impl.hh

index 541d5ab82cef7577f1378f936a87f1be1ae498ef..d82d46830f30b1c78a6c5288eeae791b94708ad8 100644 (file)
--- a/cpu/o3/alpha_dyn_inst_impl.hh
+++ b/cpu/o3/alpha_dyn_inst_impl.hh
@@ -64,9 +64,10 @@ template <class Impl>
  Fault
  AlphaDynInst<Impl>::execute()
  {
-    // @todo: Pretty convoluted way to avoid squashing from happening when using
-    // the XC during an instruction's execution (specifically for instructions
-    // that have sideeffects that use the XC).  Fix this.
+    // @todo: Pretty convoluted way to avoid squashing from happening
+    // when using the XC during an instruction's execution
+    // (specifically for instructions that have side-effects that use
+    // the XC).  Fix this.
      bool in_syscall = this->thread->inSyscall;
      this->thread->inSyscall = true;
  
@@ -81,9 +82,10 @@ template <class Impl>
  Fault
  AlphaDynInst<Impl>::initiateAcc()
  {
-    // @todo: Pretty convoluted way to avoid squashing from happening when using
-    // the XC during an instruction's execution (specifically for instructions
-    // that have sideeffects that use the XC).  Fix this.
+    // @todo: Pretty convoluted way to avoid squashing from happening
+    // when using the XC during an instruction's execution
+    // (specifically for instructions that have side-effects that use
+    // the XC).  Fix this.
      bool in_syscall = this->thread->inSyscall;
      this->thread->inSyscall = true;
  
@@ -99,10 +101,12 @@ Fault
  AlphaDynInst<Impl>::completeAcc()
  {
      if (this->isLoad()) {
+        // Loads need the request's data to complete the access.
          this->fault = this->staticInst->completeAcc(this->req->data,
                                                      this,
                                                      this->traceData);
      } else if (this->isStore()) {
+        // Stores need the result of the request to complete their access.
          this->fault = this->staticInst->completeAcc((uint8_t*)&this->req->result,
                                                      this,
                                                      this->traceData);
@@ -118,9 +122,11 @@ template <class Impl>
  Fault
  AlphaDynInst<Impl>::hwrei()
  {
+    // Can only do a hwrei when in pal mode.
      if (!this->cpu->inPalMode(this->readPC()))
          return new AlphaISA::UnimplementedOpcodeFault;
  
+    // Set the next PC based on the value of the EXC_ADDR IPR.
      this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
                                             this->threadNumber));
  
diff --git a/cpu/o3/alpha_params.hh b/cpu/o3/alpha_params.hh

index 5eb00426d553c83e0b17113719e17515fe2e7694..f0836a9fd00a4f56a81e5bba992a9869305ee115 100644 (file)
--- a/cpu/o3/alpha_params.hh
+++ b/cpu/o3/alpha_params.hh
@@ -125,7 +125,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params
      Tick fetchTrapLatency;
  
      //
-    // Branch predictor (BP & BTB)
+    // Branch predictor (BP, BTB, RAS)
      //
      std::string predType;
      unsigned localPredictorSize;
diff --git a/cpu/o3/comm.hh b/cpu/o3/comm.hh

index c36c58d3d04bef8e17d661f8f2475d59dd979cb3..d9a242a128725ea577afc9e96c18d7f1d97765aa 100644 (file)
--- a/cpu/o3/comm.hh
+++ b/cpu/o3/comm.hh
@@ -41,6 +41,7 @@
  // typedef yet are not templated on the Impl. For now it will be defined here.
  typedef short int PhysRegIndex;
  
+/** Struct that defines the information passed from fetch to decode. */
  template<class Impl>
  struct DefaultFetchDefaultDecode {
      typedef typename Impl::DynInstPtr DynInstPtr;
@@ -53,6 +54,7 @@ struct DefaultFetchDefaultDecode {
      bool clearFetchFault;
  };
  
+/** Struct that defines the information passed from decode to rename. */
  template<class Impl>
  struct DefaultDecodeDefaultRename {
      typedef typename Impl::DynInstPtr DynInstPtr;
@@ -62,6 +64,7 @@ struct DefaultDecodeDefaultRename {
      DynInstPtr insts[Impl::MaxWidth];
  };
  
+/** Struct that defines the information passed from rename to IEW. */
  template<class Impl>
  struct DefaultRenameDefaultIEW {
      typedef typename Impl::DynInstPtr DynInstPtr;
@@ -71,6 +74,7 @@ struct DefaultRenameDefaultIEW {
      DynInstPtr insts[Impl::MaxWidth];
  };
  
+/** Struct that defines the information passed from IEW to commit. */
  template<class Impl>
  struct DefaultIEWDefaultCommit {
      typedef typename Impl::DynInstPtr DynInstPtr;
@@ -98,6 +102,7 @@ struct IssueStruct {
      DynInstPtr insts[Impl::MaxWidth];
  };
  
+/** Struct that defines all backwards communication. */
  template<class Impl>
  struct TimeBufStruct {
      struct decodeComm {
@@ -119,13 +124,7 @@ struct TimeBufStruct {
  
      decodeComm decodeInfo[Impl::MaxThreads];
  
-    // Rename can't actually tell anything to squash or send a new PC back
-    // because it doesn't do anything along those lines.  But maybe leave
-    // these fields in here to keep the stages mostly orthagonal.
      struct renameComm {
-        bool squash;
-
-        uint64_t nextPC;
      };
  
      renameComm renameInfo[Impl::MaxThreads];
diff --git a/cpu/o3/commit.hh b/cpu/o3/commit.hh

index 66abf8dc6b3e3c3ddf482098283c53d8cb2df0c6..d9382239463c7deff37e0fe7c87ec0c2eeb34864 100644 (file)
--- a/cpu/o3/commit.hh
+++ b/cpu/o3/commit.hh
@@ -84,6 +84,9 @@ class DefaultCommit
  
      typedef O3ThreadState<Impl> Thread;
  
+    /** Event class used to schedule a squash due to a trap (fault or
+     * interrupt) to happen on a specific cycle.
+     */
      class TrapEvent : public Event {
        private:
          DefaultCommit<Impl> *commit;
@@ -161,7 +164,7 @@ class DefaultCommit
  
      Fetch *fetchStage;
  
-    /** Sets the poitner to the IEW stage. */
+    /** Sets the pointer to the IEW stage. */
      void setIEWStage(IEW *iew_stage);
  
      /** The pointer to the IEW stage. Used solely to ensure that
@@ -182,10 +185,13 @@ class DefaultCommit
      /** Initializes stage by sending back the number of free entries. */
      void initStage();
  
+    /** Initializes the switching out of commit. */
      void switchOut();
  
+    /** Completes the switch out of commit. */
      void doSwitchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
      /** Ticks the commit stage, which tries to commit instructions. */
@@ -199,11 +205,18 @@ class DefaultCommit
      /** Returns the number of free ROB entries for a specific thread. */
      unsigned numROBFreeEntries(unsigned tid);
  
+    /** Generates an event to schedule a squash due to a trap. */
+    void generateTrapEvent(unsigned tid);
+
+    /** Records that commit needs to initiate a squash due to an
+     * external state update through the XC.
+     */
      void generateXCEvent(unsigned tid);
  
    private:
      /** Updates the overall status of commit with the nextStatus, and
-     * tell the CPU if commit is active/inactive. */
+     * tell the CPU if commit is active/inactive.
+     */
      void updateStatus();
  
      /** Sets the next status based on threads' statuses, which becomes the
@@ -222,10 +235,13 @@ class DefaultCommit
       */
      bool changedROBEntries();
  
+    /** Squashes all in flight instructions. */
      void squashAll(unsigned tid);
  
+    /** Handles squashing due to a trap. */
      void squashFromTrap(unsigned tid);
  
+    /** Handles squashing due to an XC write. */
      void squashFromXC(unsigned tid);
  
      /** Commits as many instructions as possible. */
@@ -236,8 +252,6 @@ class DefaultCommit
       */
      bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
  
-    void generateTrapEvent(unsigned tid);
-
      /** Gets instructions from rename and inserts them into the ROB. */
      void getInsts();
  
@@ -259,12 +273,16 @@ class DefaultCommit
       */
      uint64_t readPC() { return PC[0]; }
  
+    /** Returns the PC of a specific thread. */
      uint64_t readPC(unsigned tid) { return PC[tid]; }
  
+    /** Sets the PC of a specific thread. */
      void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
  
+    /** Reads the PC of a specific thread. */
      uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
  
+    /** Sets the next PC of a specific thread. */
      void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
  
    private:
@@ -304,6 +322,7 @@ class DefaultCommit
      /** Memory interface.  Used for d-cache accesses. */
      MemInterface *dcacheInterface;
  
+    /** Vector of all of the threads. */
      std::vector<Thread *> thread;
  
      Fault fetchFault;
@@ -362,17 +381,27 @@ class DefaultCommit
      /** Number of Active Threads */
      unsigned numThreads;
  
+    /** Is a switch out pending. */
      bool switchPending;
+
+    /** Is commit switched out. */
      bool switchedOut;
  
+    /** The latency to handle a trap.  Used when scheduling trap
+     * squash event.
+     */
      Tick trapLatency;
  
      Tick fetchTrapLatency;
  
      Tick fetchFaultTick;
  
+    /** The commit PC of each thread.  Refers to the instruction that
+     * is currently being processed/committed.
+     */
      Addr PC[Impl::MaxThreads];
  
+    /** The next PC of each thread. */
      Addr nextPC[Impl::MaxThreads];
  
      /** The sequence number of the youngest valid instruction in the ROB. */
@@ -384,6 +413,7 @@ class DefaultCommit
      /** Rename map interface. */
      RenameMap *renameMap[Impl::MaxThreads];
  
+    /** Updates commit stats based on this instruction. */
      void updateComInstStats(DynInstPtr &inst);
  
      /** Stat for the total number of committed instructions. */
@@ -417,7 +447,9 @@ class DefaultCommit
      /** Total number of committed branches. */
      Stats::Vector<> statComBranches;
  
+    /** Number of cycles where the commit bandwidth limit is reached. */
      Stats::Scalar<> commitEligibleSamples;
+    /** Number of instructions not committed due to bandwidth limits. */
      Stats::Vector<> commitEligible;
  };
  
diff --git a/cpu/o3/commit_impl.hh b/cpu/o3/commit_impl.hh

index 346a8bc1c3bac146a1020a7409f7bd227ea10df8..9409697eb13c34baa8c5c77230577dda26387d13 100644 (file)
--- a/cpu/o3/commit_impl.hh
+++ b/cpu/o3/commit_impl.hh
@@ -691,7 +691,7 @@ DefaultCommit<Impl>::commit()
  
      while (threads != (*activeThreads).end()) {
          unsigned tid = *threads++;
-
+/*
          if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
              // Record the fault.  Wait until it's empty in the ROB.
              // Then handle the trap.  Ignore it if there's already a
@@ -713,7 +713,7 @@ DefaultCommit<Impl>::commit()
                  commitStatus[0] = Running;
              }
          }
-
+*/
          // Not sure which one takes priority.  I think if we have
          // both, that's a bad sign.
          if (trapSquash[tid] == true) {
@@ -925,7 +925,7 @@ DefaultCommit<Impl>::commitInsts()
      numCommittedDist.sample(num_committed);
  
      if (num_committed == commitWidth) {
-        commitEligible[0]++;
+        commitEligibleSamples[0]++;
      }
  }
  
@@ -947,6 +947,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
          head_inst->reachedCommit = true;
  
          if (head_inst->isNonSpeculative() ||
+            head_inst->isStoreConditional() ||
              head_inst->isMemBarrier() ||
              head_inst->isWriteBarrier()) {
  
diff --git a/cpu/o3/cpu.hh b/cpu/o3/cpu.hh

index 8db65d50160483fab4c2f3b3f27c16d27392d50d..f4b19bfb3f20433743ad6e6156affc4d8c63b92f 100644 (file)
--- a/cpu/o3/cpu.hh
+++ b/cpu/o3/cpu.hh
@@ -67,6 +67,11 @@ class BaseFullCPU : public BaseCPU
      int cpu_id;
  };
  
+/**
+ * FullO3CPU class, has each of the stages (fetch through commit)
+ * within it, as well as all of the time buffers between stages.  The
+ * tick() function for the CPU is defined here.
+ */
  template <class Impl>
  class FullO3CPU : public BaseFullCPU
  {
@@ -194,17 +199,13 @@ class FullO3CPU : public BaseFullCPU
       */
      virtual void syscall(int tid) { panic("Unimplemented!"); }
  
-    /** Check if there are any system calls pending. */
-    void checkSyscalls();
-
-    /** Switches out this CPU.
-     */
+    /** Switches out this CPU. */
      void switchOut(Sampler *sampler);
  
+    /** Signals to this CPU that a stage has completed switching out. */
      void signalSwitched();
  
-    /** Takes over from another CPU.
-     */
+    /** Takes over from another CPU. */
      void takeOverFrom(BaseCPU *oldCPU);
  
      /** Get the current instruction sequence number, and increment it. */
@@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU
  
  #endif
  
-    //
-    // New accessors for new decoder.
-    //
+    /** Register accessors.  Index refers to the physical register index. */
      uint64_t readIntReg(int reg_idx);
  
      float readFloatRegSingle(int reg_idx);
@@ -271,6 +270,11 @@ class FullO3CPU : public BaseFullCPU
  
      uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
  
+    /** Architectural register accessors.  Looks up in the commit
+     * rename table to obtain the true physical index of the
+     * architected register first, then accesses that physical
+     * register.
+     */
      void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
  
      void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
@@ -279,13 +283,17 @@ class FullO3CPU : public BaseFullCPU
  
      void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
  
+    /** Reads the commit PC of a specific thread. */
      uint64_t readPC(unsigned tid);
  
-    void setPC(Addr new_PC,unsigned tid);
+    /** Sets the commit PC of a specific thread. */
+    void setPC(Addr new_PC, unsigned tid);
  
+    /** Reads the next PC of a specific thread. */
      uint64_t readNextPC(unsigned tid);
  
-    void setNextPC(uint64_t val,unsigned tid);
+    /** Sets the next PC of a specific thread. */
+    void setNextPC(uint64_t val, unsigned tid);
  
      /** Function to add instruction onto the head of the list of the
       *  instructions.  Used when new instructions are fetched.
@@ -309,21 +317,15 @@ class FullO3CPU : public BaseFullCPU
      /** Remove all instructions younger than the given sequence number. */
      void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
  
+    /** Removes the instruction pointed to by the iterator. */
      inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
  
+    /** Cleans up all instructions on the remove list. */
      void cleanUpRemovedInsts();
  
-    /** Remove all instructions from the list. */
-//    void removeAllInsts();
-
+    /** Debug function to print all instructions on the list. */
      void dumpInsts();
  
-    /** Basically a wrapper function so that instructions executed at
-     *  commit can tell the instruction queue that they have
-     *  completed.  Eventually this hack should be removed.
-     */
-//    void wakeDependents(DynInstPtr &inst);
-
    public:
      /** List of all the instructions in flight. */
      std::list<DynInstPtr> instList;
@@ -334,6 +336,9 @@ class FullO3CPU : public BaseFullCPU
      std::queue<ListIt> removeList;
  
  #ifdef DEBUG
+    /** Debug structure to keep track of the sequence numbers still in
+     * flight.
+     */
      std::set<InstSeqNum> snList;
  #endif
  
@@ -420,14 +425,22 @@ class FullO3CPU : public BaseFullCPU
      /** The IEW stage's instruction queue. */
      TimeBuffer<IEWStruct> iewQueue;
  
-  public:
+  private:
+    /** The activity recorder; used to tell if the CPU has any
+     * activity remaining or if it can go to idle and deschedule
+     * itself.
+     */
      ActivityRecorder activityRec;
  
+  public:
+    /** Records that there was time buffer activity this cycle. */
      void activityThisCycle() { activityRec.activity(); }
  
+    /** Changes a stage's status to active within the activity recorder. */
      void activateStage(const StageIdx idx)
      { activityRec.activateStage(idx); }
  
+    /** Changes a stage's status to inactive within the activity recorder. */
      void deactivateStage(const StageIdx idx)
      { activityRec.deactivateStage(idx); }
  
@@ -438,7 +451,7 @@ class FullO3CPU : public BaseFullCPU
      int getFreeTid();
  
    public:
-    /** Temporary function to get pointer to exec context. */
+    /** Returns a pointer to a thread's exec context. */
      ExecContext *xcBase(unsigned tid)
      {
          return thread[tid]->getXCProxy();
@@ -447,6 +460,10 @@ class FullO3CPU : public BaseFullCPU
      /** The global sequence number counter. */
      InstSeqNum globalSeqNum;
  
+    /** Pointer to the checker, which can dynamically verify
+     * instruction results at run time.  This can be set to NULL if it
+     * is not being used.
+     */
      Checker<DynInstPtr> *checker;
  
  #if FULL_SYSTEM
@@ -462,11 +479,13 @@ class FullO3CPU : public BaseFullCPU
      /** Pointer to memory. */
      FunctionalMemory *mem;
  
+    /** Pointer to the sampler */
      Sampler *sampler;
  
+    /** Counter of how many stages have completed switching out. */
      int switchCount;
  
-    // List of all ExecContexts.
+    /** Pointers to all of the threads in the CPU. */
      std::vector<Thread *> thread;
  
  #if 0
diff --git a/cpu/o3/cpu_policy.hh b/cpu/o3/cpu_policy.hh

index b4249b12deb9bb8dd7897d96a458210372982aa7..c30e58389bcce6895fca028f9994a08c149f72dc 100644 (file)
--- a/cpu/o3/cpu_policy.hh
+++ b/cpu/o3/cpu_policy.hh
@@ -48,24 +48,50 @@
  
  #include "cpu/o3/comm.hh"
  
+/**
+ * Struct that defines the key classes to be used by the CPU.  All
+ * classes use the typedefs defined here to determine what are the
+ * classes of the other stages and communication buffers.  In order to
+ * change a structure such as the IQ, simply change the typedef here
+ * to use the desired class instead, and recompile.  In order to
+ * create a different CPU to be used simultaneously with this one, see
+ * the alpha_impl.hh file for instructions.
+ */
  template<class Impl>
  struct SimpleCPUPolicy
  {
+    /** Typedef for the branch prediction unit (which includes the BP,
+     * RAS, and BTB).
+     */
      typedef BPredUnit<Impl> BPredUnit;
+    /** Typedef for the register file.  Most classes assume a unified
+     * physical register file.
+     */
      typedef PhysRegFile<Impl> RegFile;
+    /** Typedef for the freelist of registers. */
      typedef SimpleFreeList FreeList;
+    /** Typedef for the rename map. */
      typedef SimpleRenameMap RenameMap;
+    /** Typedef for the ROB. */
      typedef ROB<Impl> ROB;
+    /** Typedef for the instruction queue/scheduler. */
      typedef InstructionQueue<Impl> IQ;
+    /** Typedef for the memory dependence unit. */
      typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
+    /** Typedef for the LSQ. */
      typedef LSQ<Impl> LSQ;
+    /** Typedef for the thread-specific LSQ units. */
      typedef LSQUnit<Impl> LSQUnit;
  
-
+    /** Typedef for fetch. */
      typedef DefaultFetch<Impl> Fetch;
+    /** Typedef for decode. */
      typedef DefaultDecode<Impl> Decode;
+    /** Typedef for rename. */
      typedef DefaultRename<Impl> Rename;
+    /** Typedef for Issue/Execute/Writeback. */
      typedef DefaultIEW<Impl> IEW;
+    /** Typedef for commit. */
      typedef DefaultCommit<Impl> Commit;
  
      /** The struct for communication between fetch and decode. */
diff --git a/cpu/o3/decode.hh b/cpu/o3/decode.hh

index 3035b3387b52ba6b6413fab0b61952641f40c014..b336575a81980d49933b7dde92114fe4fb9e7a7d 100644 (file)
--- a/cpu/o3/decode.hh
+++ b/cpu/o3/decode.hh
@@ -107,9 +107,12 @@ class DefaultDecode
      /** Sets pointer to list of active threads. */
      void setActiveThreads(std::list<unsigned> *at_ptr);
  
+    /** Switches out the decode stage. */
      void switchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
+
      /** Ticks decode, processing all input signals and decoding as many
       * instructions as possible.
       */
diff --git a/cpu/o3/decode_impl.hh b/cpu/o3/decode_impl.hh

index 8d84d46c8e91a7e1007a6f3cb664265cf126f69f..0b686375e13ba4f16f07e3070a34772ca787996e 100644 (file)
--- a/cpu/o3/decode_impl.hh
+++ b/cpu/o3/decode_impl.hh
@@ -41,6 +41,7 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
  {
      _status = Inactive;
  
+    // Setup status, make sure stall signals are clear.
      for (int i = 0; i < numThreads; ++i) {
          decodeStatus[i] = Idle;
  
@@ -165,6 +166,7 @@ template <class Impl>
  void
  DefaultDecode<Impl>::switchOut()
  {
+    // Decode can immediately switch out.
      cpu->signalSwitched();
  }
  
@@ -174,6 +176,7 @@ DefaultDecode<Impl>::takeOverFrom()
  {
      _status = Inactive;
  
+    // Be sure to reset state and clear out any old instructions.
      for (int i = 0; i < numThreads; ++i) {
          decodeStatus[i] = Idle;
  
@@ -222,22 +225,22 @@ DefaultDecode<Impl>::block(unsigned tid)
  {
      DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
  
-    // If the decode status is blocked or unblocking then decode has not yet
-    // signalled fetch to unblock. In that case, there is no need to tell
-    // fetch to block.
-    if (decodeStatus[tid] != Blocked &&
-        decodeStatus[tid] != Unblocking) {
-        toFetch->decodeBlock[tid] = true;
-        wroteToTimeBuffer = true;
-    }
-
      // Add the current inputs to the skid buffer so they can be
      // reprocessed when this stage unblocks.
      skidInsert(tid);
  
+    // If the decode status is blocked or unblocking then decode has not yet
+    // signalled fetch to unblock. In that case, there is no need to tell
+    // fetch to block.
      if (decodeStatus[tid] != Blocked) {
          // Set the status to Blocked.
          decodeStatus[tid] = Blocked;
+
+        if (decodeStatus[tid] != Unblocking) {
+            toFetch->decodeBlock[tid] = true;
+            wroteToTimeBuffer = true;
+        }
+
          return true;
      }
  
@@ -270,13 +273,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
      DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
              "detected at decode.\n", tid);
  
+    // Send back mispredict information.
      toFetch->decodeInfo[tid].branchMispredict = true;
      toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
      toFetch->decodeInfo[tid].predIncorrect = true;
      toFetch->decodeInfo[tid].squash = true;
      toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
-    toFetch->decodeInfo[tid].branchTaken = true;
+    toFetch->decodeInfo[tid].branchTaken =
+        inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
  
+    // Might have to tell fetch to unblock.
      if (decodeStatus[tid] == Blocked ||
          decodeStatus[tid] == Unblocking) {
          toFetch->decodeUnblock[tid] = 1;
@@ -292,11 +298,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
          }
      }
  
+    // Clear the instruction list and skid buffer in case they have any
+    // insts in them.
      while (!insts[tid].empty()) {
          insts[tid].pop();
      }
  
-    // Clear the skid buffer in case it has any data in it.
      while (!skidBuffer[tid].empty()) {
          skidBuffer[tid].pop();
      }
@@ -341,11 +348,12 @@ DefaultDecode<Impl>::squash(unsigned tid)
          }
      }
  
+    // Clear the instruction list and skid buffer in case they have any
+    // insts in them.
      while (!insts[tid].empty()) {
          insts[tid].pop();
      }
  
-    // Clear the skid buffer in case it has any data in it.
      while (!skidBuffer[tid].empty()) {
          skidBuffer[tid].pop();
      }
diff --git a/cpu/o3/dep_graph.hh b/cpu/o3/dep_graph.hh

index f8ae38da42bab822dec0c6c31fc0e52d6451dff4..b6c5f1ab1be86227395a99f7462c9d5d572bf7d0 100644 (file)
--- a/cpu/o3/dep_graph.hh
+++ b/cpu/o3/dep_graph.hh
@@ -4,6 +4,7 @@
  
  #include "cpu/o3/comm.hh"
  
+/** Node in a linked list. */
  template <class DynInstPtr>
  class DependencyEntry
  {
@@ -18,32 +19,50 @@ class DependencyEntry
      DependencyEntry<DynInstPtr> *next;
  };
  
+/** Array of linked list that maintains the dependencies between
+ * producing instructions and consuming instructions.  Each linked
+ * list represents a single physical register, having the future
+ * producer of the register's value, and all consumers waiting on that
+ * value on the list.  The head node of each linked list represents
+ * the producing instruction of that register.  Instructions are put
+ * on the list upon reaching the IQ, and are removed from the list
+ * either when the producer completes, or the instruction is squashed.
+*/
  template <class DynInstPtr>
  class DependencyGraph
  {
    public:
      typedef DependencyEntry<DynInstPtr> DepEntry;
  
+    /** Default construction.  Must call resize() prior to use. */
      DependencyGraph()
          : numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
      { }
  
+    /** Resize the dependency graph to have num_entries registers. */
      void resize(int num_entries);
  
+    /** Clears all of the linked lists. */
      void reset();
  
+    /** Inserts an instruction to be dependent on the given index. */
      void insert(PhysRegIndex idx, DynInstPtr &new_inst);
  
+    /** Sets the producing instruction of a given register. */
      void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
      { dependGraph[idx].inst = new_inst; }
  
+    /** Clears the producing instruction. */
      void clearInst(PhysRegIndex idx)
      { dependGraph[idx].inst = NULL; }
  
+    /** Removes an instruction from a single linked list. */
      void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
  
+    /** Removes and returns the newest dependent of a specific register. */
      DynInstPtr pop(PhysRegIndex idx);
  
+    /** Checks if there are any dependents on a specific register. */
      bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
  
      /** Debugging function to dump out the dependency graph.
@@ -59,13 +78,16 @@ class DependencyGraph
       */
      DepEntry *dependGraph;
  
+    /** Number of linked lists; identical to the number of registers. */
      int numEntries;
  
      // Debug variable, remove when done testing.
      unsigned memAllocCounter;
  
    public:
+    // Debug variable, remove when done testing.
      uint64_t nodesTraversed;
+    // Debug variable, remove when done testing.
      uint64_t nodesRemoved;
  };
  
diff --git a/cpu/o3/fetch.hh b/cpu/o3/fetch.hh

index 3fcfdc3a171fee9ca87d1fd05fab7966999222bb..92a87ab546688be070af0f692e1742ef854c36c0 100644 (file)
--- a/cpu/o3/fetch.hh
+++ b/cpu/o3/fetch.hh
@@ -42,7 +42,7 @@ class Sampler;
   * width is specified by the parameters; each cycle it tries to fetch
   * that many instructions. It supports using a branch predictor to
   * predict direction and targets.
- * It supports the idling functionalitiy of the CPU by indicating to
+ * It supports the idling functionality of the CPU by indicating to
   * the CPU when it is active and inactive.
   */
  template <class Impl>
@@ -163,14 +163,19 @@ class DefaultFetch
      /** Processes cache completion event. */
      void processCacheCompletion(MemReqPtr &req);
  
+    /** Begins the switch out of the fetch stage. */
      void switchOut();
  
+    /** Completes the switch out of the fetch stage. */
      void doSwitchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
+    /** Checks if the fetch stage is switched out. */
      bool isSwitchedOut() { return switchedOut; }
  
+    /** Tells fetch to wake up from a quiesce instruction. */
      void wakeFromQuiesce();
  
    private:
@@ -301,8 +306,10 @@ class DefaultFetch
      /** BPredUnit. */
      BPredUnit branchPred;
  
+    /** Per-thread fetch PC. */
      Addr PC[Impl::MaxThreads];
  
+    /** Per-thread next PC. */
      Addr nextPC[Impl::MaxThreads];
  
      /** Memory request used to access cache. */
@@ -369,8 +376,12 @@ class DefaultFetch
      /** Thread ID being fetched. */
      int threadFetched;
  
+    /** Checks if there is an interrupt pending.  If there is, fetch
+     * must stop once it is not fetching PAL instructions.
+     */
      bool interruptPending;
  
+    /** Records if fetch is switched out. */
      bool switchedOut;
  
  #if !FULL_SYSTEM
@@ -394,17 +405,23 @@ class DefaultFetch
       * the pipeline.
       */
      Stats::Scalar<> fetchIdleCycles;
+    /** Total number of cycles spent blocked. */
      Stats::Scalar<> fetchBlockedCycles;
-
+    /** Total number of cycles spent in any other state. */
      Stats::Scalar<> fetchMiscStallCycles;
      /** Stat for total number of fetched cache lines. */
      Stats::Scalar<> fetchedCacheLines;
-
+    /** Total number of outstanding icache accesses that were dropped
+     * due to a squash.
+     */
      Stats::Scalar<> fetchIcacheSquashes;
      /** Distribution of number of instructions fetched each cycle. */
      Stats::Distribution<> fetchNisnDist;
+    /** Rate of how often fetch was idle. */
      Stats::Formula idleRate;
+    /** Number of branch fetches per cycle. */
      Stats::Formula branchRate;
+    /** Number of instruction fetched per cycle. */
      Stats::Formula fetchRate;
  };
  
diff --git a/cpu/o3/fetch_impl.hh b/cpu/o3/fetch_impl.hh

index 1c5e508f6a9da4a7a66e104723307d3021fb2f5a..a309bd49a80cf0566640979448cdf1653c6050d2 100644 (file)
--- a/cpu/o3/fetch_impl.hh
+++ b/cpu/o3/fetch_impl.hh
@@ -161,59 +161,59 @@ void
  DefaultFetch<Impl>::regStats()
  {
      icacheStallCycles
-        .name(name() + ".FETCH:icacheStallCycles")
+        .name(name() + ".icacheStallCycles")
          .desc("Number of cycles fetch is stalled on an Icache miss")
          .prereq(icacheStallCycles);
  
      fetchedInsts
-        .name(name() + ".FETCH:Insts")
+        .name(name() + ".Insts")
          .desc("Number of instructions fetch has processed")
          .prereq(fetchedInsts);
  
      fetchedBranches
-        .name(name() + ".FETCH:Branches")
+        .name(name() + ".Branches")
          .desc("Number of branches that fetch encountered")
          .prereq(fetchedBranches);
  
      predictedBranches
-        .name(name() + ".FETCH:predictedBranches")
+        .name(name() + ".predictedBranches")
          .desc("Number of branches that fetch has predicted taken")
          .prereq(predictedBranches);
  
      fetchCycles
-        .name(name() + ".FETCH:Cycles")
+        .name(name() + ".Cycles")
          .desc("Number of cycles fetch has run and was not squashing or"
                " blocked")
          .prereq(fetchCycles);
  
      fetchSquashCycles
-        .name(name() + ".FETCH:SquashCycles")
+        .name(name() + ".SquashCycles")
          .desc("Number of cycles fetch has spent squashing")
          .prereq(fetchSquashCycles);
  
      fetchIdleCycles
-        .name(name() + ".FETCH:IdleCycles")
+        .name(name() + ".IdleCycles")
          .desc("Number of cycles fetch was idle")
          .prereq(fetchIdleCycles);
  
      fetchBlockedCycles
-        .name(name() + ".FETCH:BlockedCycles")
+        .name(name() + ".BlockedCycles")
          .desc("Number of cycles fetch has spent blocked")
          .prereq(fetchBlockedCycles);
  
      fetchedCacheLines
-        .name(name() + ".FETCH:CacheLines")
+        .name(name() + ".CacheLines")
          .desc("Number of cache lines fetched")
          .prereq(fetchedCacheLines);
  
      fetchMiscStallCycles
-        .name(name() + ".FETCH:MiscStallCycles")
+        .name(name() + ".MiscStallCycles")
          .desc("Number of cycles fetch has spent waiting on interrupts, or "
                "bad addresses, or out of MSHRs")
          .prereq(fetchMiscStallCycles);
  
      fetchIcacheSquashes
-        .name(name() + ".FETCH:IcacheSquashes")
+        .name(name() + ".IcacheSquashes")
          .desc("Number of outstanding Icache misses that were squashed")
          .prereq(fetchIcacheSquashes);
  
@@ -221,24 +221,24 @@ DefaultFetch<Impl>::regStats()
          .init(/* base value */ 0,
                /* last value */ fetchWidth,
                /* bucket size */ 1)
-        .name(name() + ".FETCH:rateDist")
+        .name(name() + ".rateDist")
          .desc("Number of instructions fetched each cycle (Total)")
          .flags(Stats::pdf);
  
      idleRate
-        .name(name() + ".FETCH:idleRate")
+        .name(name() + ".idleRate")
          .desc("Percent of cycles fetch was idle")
          .prereq(idleRate);
      idleRate = fetchIdleCycles * 100 / cpu->numCycles;
  
      branchRate
-        .name(name() + ".FETCH:branchRate")
+        .name(name() + ".branchRate")
          .desc("Number of branch fetches per cycle")
          .flags(Stats::total);
-    branchRate = predictedBranches / cpu->numCycles;
+    branchRate = fetchedBranches / cpu->numCycles;
  
      fetchRate
-        .name(name() + ".FETCH:rate")
+        .name(name() + ".rate")
          .desc("Number of inst fetches per cycle")
          .flags(Stats::total);
      fetchRate = fetchedInsts / cpu->numCycles;
@@ -307,6 +307,7 @@ template<class Impl>
  void
  DefaultFetch<Impl>::initStage()
  {
+    // Setup PC and nextPC with initial state.
      for (int tid = 0; tid < numThreads; tid++) {
          PC[tid] = cpu->readPC(tid);
          nextPC[tid] = cpu->readNextPC(tid);
@@ -323,8 +324,6 @@ DefaultFetch<Impl>::processCacheCompletion(MemReqPtr &req)
  
      // Only change the status if it's still waiting on the icache access
      // to return.
-    // Can keep track of how many cache accesses go unused due to
-    // misspeculation here.
      if (fetchStatus[tid] != IcacheMissStall ||
          req != memReq[tid] ||
          isSwitchedOut()) {
@@ -358,6 +357,7 @@ template <class Impl>
  void
  DefaultFetch<Impl>::switchOut()
  {
+    // Fetch is ready to switch out at any time.
      switchedOut = true;
      cpu->signalSwitched();
  }
@@ -366,6 +366,7 @@ template <class Impl>
  void
  DefaultFetch<Impl>::doSwitchOut()
  {
+    // Branch predictor needs to have its state cleared.
      branchPred.switchOut();
  }
  
@@ -396,6 +397,7 @@ DefaultFetch<Impl>::wakeFromQuiesce()
  {
      DPRINTF(Fetch, "Waking up from quiesce\n");
      // Hopefully this is safe
+    // @todo: Allow other threads to wake from quiesce.
      fetchStatus[0] = Running;
  }
  
@@ -831,7 +833,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(unsigned tid)
          }
      }
  
-    if (checkStall(tid) && fetchStatus[tid] != IcacheMissStall) {
+    if (fetchStatus[tid] != IcacheMissStall && checkStall(tid)) {
          DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);
  
          fetchStatus[tid] = Blocked;
@@ -1199,7 +1201,7 @@ DefaultFetch<Impl>::lsqCount()
  
          if (fetchStatus[high_pri] == Running ||
              fetchStatus[high_pri] == IcacheMissComplete ||
-           fetchStatus[high_pri] == Idle)
+            fetchStatus[high_pri] == Idle)
              return high_pri;
          else
              PQ.pop();
diff --git a/cpu/o3/fu_pool.cc b/cpu/o3/fu_pool.cc

index fb2b5c00d399fe3914ca53364ccd812455b6f879..b28b5d37f7386c0cbfbf6217c21942343c270541 100644 (file)
--- a/cpu/o3/fu_pool.cc
+++ b/cpu/o3/fu_pool.cc
@@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability)
          }
      }
  
+    assert(fu_idx < numFU);
+
      unitBusy[fu_idx] = true;
  
      return fu_idx;
diff --git a/cpu/o3/fu_pool.hh b/cpu/o3/fu_pool.hh

index da6fdc80216428ff079501409d4012d1a8a49089..052e4832d70706d42a2e844251f21986bfa7c324 100644 (file)
--- a/cpu/o3/fu_pool.hh
+++ b/cpu/o3/fu_pool.hh
@@ -155,7 +155,10 @@ class FUPool : public SimObject
          return maxIssueLatencies[capability];
      }
  
+    /** Switches out functional unit pool. */
      void switchOut();
+
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  };
  
diff --git a/cpu/o3/iew.hh b/cpu/o3/iew.hh

index 93532062801ea69bea38a826678e321679921838..eda6a6bc07611b8b34c00807e692286a22277911 100644 (file)
--- a/cpu/o3/iew.hh
+++ b/cpu/o3/iew.hh
@@ -160,12 +160,16 @@ class DefaultIEW
      /** Sets pointer to the scoreboard. */
      void setScoreboard(Scoreboard *sb_ptr);
  
+    /** Starts switch out of IEW stage. */
      void switchOut();
  
+    /** Completes switch out of IEW stage. */
      void doSwitchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
+    /** Returns if IEW is switched out. */
      bool isSwitchedOut() { return switchedOut; }
  
      /** Sets page table pointer within LSQ. */
@@ -287,6 +291,7 @@ class DefaultIEW
      void tick();
  
    private:
+    /** Updates execution stats based on the instruction. */
      void updateExeInstStats(DynInstPtr &inst);
  
      /** Pointer to main time buffer used for backwards communication. */
@@ -429,6 +434,7 @@ class DefaultIEW
      /** Maximum size of the skid buffer. */
      unsigned skidBufferMax;
  
+    /** Is this stage switched out. */
      bool switchedOut;
  
      /** Stat for total number of idle cycles. */
@@ -470,9 +476,13 @@ class DefaultIEW
      /** Stat for total number of mispredicted branches detected at execute. */
      Stats::Formula branchMispredicts;
  
+    /** Number of executed software prefetches. */
      Stats::Vector<> exeSwp;
+    /** Number of executed nops. */
      Stats::Vector<> exeNop;
+    /** Number of executed meomory references. */
      Stats::Vector<> exeRefs;
+    /** Number of executed branches. */
      Stats::Vector<> exeBranches;
  
  //    Stats::Vector<> issued_ops;
@@ -482,19 +492,30 @@ class DefaultIEW
      Stats::Vector<> dist_unissued;
      Stats::Vector2d<> stat_issued_inst_type;
  */
+    /** Number of instructions issued per cycle. */
      Stats::Formula issueRate;
+    /** Number of executed store instructions. */
      Stats::Formula iewExecStoreInsts;
  //    Stats::Formula issue_op_rate;
  //    Stats::Formula fu_busy_rate;
-
+    /** Number of instructions sent to commit. */
      Stats::Vector<> iewInstsToCommit;
+    /** Number of instructions that writeback. */
      Stats::Vector<> writebackCount;
+    /** Number of instructions that wake consumers. */
      Stats::Vector<> producerInst;
+    /** Number of instructions that wake up from producers. */
      Stats::Vector<> consumerInst;
+    /** Number of instructions that were delayed in writing back due
+     * to resource contention.
+     */
      Stats::Vector<> wbPenalized;
  
+    /** Number of instructions per cycle written back. */
      Stats::Formula wbRate;
+    /** Average number of woken instructions per writeback. */
      Stats::Formula wbFanout;
+    /** Number of instructions per cycle delayed in writing back . */
      Stats::Formula wbPenalizedRate;
  };
  
diff --git a/cpu/o3/iew_impl.hh b/cpu/o3/iew_impl.hh

index b0137d7fca4b66e8d4555417ba36f766423b5087..3ed20cb75ab1311edfcfe4c7572f42dcd60c3a1d 100644 (file)
--- a/cpu/o3/iew_impl.hh
+++ b/cpu/o3/iew_impl.hh
@@ -433,6 +433,7 @@ template <class Impl>
  void
  DefaultIEW<Impl>::switchOut()
  {
+    // IEW is ready to switch out at any time.
      cpu->signalSwitched();
  }
  
@@ -440,6 +441,7 @@ template <class Impl>
  void
  DefaultIEW<Impl>::doSwitchOut()
  {
+    // Clear any state.
      switchedOut = true;
  
      instQueue.switchOut();
@@ -458,6 +460,7 @@ template <class Impl>
  void
  DefaultIEW<Impl>::takeOverFrom()
  {
+    // Reset all state.
      _status = Active;
      exeStatus = Running;
      wbStatus = Idle;
@@ -571,6 +574,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
      toCommit->squashedSeqNum[tid] = inst->seqNum;
      toCommit->nextPC[tid] = inst->readPC();
  
+    // Must include the broadcasted SN in the squash.
      toCommit->includeSquashInst[tid] = true;
  
      ldstQueue.setLoadBlockedHandled(tid);
@@ -1104,6 +1108,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
                  // Store conditionals need to be set as "canCommit()"
                  // so that commit can process them when they reach the
                  // head of commit.
+                // @todo: This is somewhat specific to Alpha.
                  inst->setCanCommit();
                  instQueue.insertNonSpec(inst);
                  add_to_iq = false;
@@ -1363,6 +1368,7 @@ DefaultIEW<Impl>::executeInsts()
          }
      }
  
+    // Update and record activity if we processed any instructions.
      if (inst_num) {
          if (exeStatus == Idle) {
              exeStatus = Running;
@@ -1413,8 +1419,10 @@ DefaultIEW<Impl>::writebackInsts()
                  scoreboard->setReg(inst->renamedDestRegIdx(i));
              }
  
-            producerInst[tid]++;
-            consumerInst[tid]+= dependents;
+            if (dependents) {
+                producerInst[tid]++;
+                consumerInst[tid]+= dependents;
+            }
              writebackCount[tid]++;
          }
      }
@@ -1485,6 +1493,7 @@ DefaultIEW<Impl>::tick()
  
          DPRINTF(IEW,"Processing [tid:%i]\n",tid);
  
+        // Update structures based on instructions committed.
          if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
              !fromCommit->commitInfo[tid].squash &&
              !fromCommit->commitInfo[tid].robSquashing) {
diff --git a/cpu/o3/inst_queue.hh b/cpu/o3/inst_queue.hh

index 518de73d9cfc5ad66c8f4b5c0eb41f31ea95b68d..4802cbaf48ff822ff116654cec1eeb5143862dd1 100644 (file)
--- a/cpu/o3/inst_queue.hh
+++ b/cpu/o3/inst_queue.hh
@@ -92,6 +92,9 @@ class InstructionQueue
          /** Pointer back to the instruction queue. */
          InstructionQueue<Impl> *iqPtr;
  
+        /** Should the FU be added to the list to be freed upon
+         * completing this event.
+         */
          bool freeFU;
  
        public:
@@ -116,6 +119,7 @@ class InstructionQueue
      /** Registers statistics. */
      void regStats();
  
+    /** Resets all instruction queue state. */
      void resetState();
  
      /** Sets CPU pointer. */
@@ -133,10 +137,13 @@ class InstructionQueue
      /** Sets the global time buffer. */
      void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
  
+    /** Switches out the instruction queue. */
      void switchOut();
  
+    /** Takes over execution from another CPU's thread. */
      void takeOverFrom();
  
+    /** Returns if the IQ is switched out. */
      bool isSwitchedOut() { return switchedOut; }
  
      /** Number of entries needed for given amount of threads. */
@@ -171,6 +178,9 @@ class InstructionQueue
       */
      void insertBarrier(DynInstPtr &barr_inst);
  
+    /** Returns the oldest scheduled instruction, and removes it from
+     * the list of instructions waiting to execute.
+     */
      DynInstPtr getInstToExecute();
  
      /**
@@ -274,13 +284,15 @@ class InstructionQueue
      /** List of all the instructions in the IQ (some of which may be issued). */
      std::list<DynInstPtr> instList[Impl::MaxThreads];
  
+    /** List of instructions that are ready to be executed. */
      std::list<DynInstPtr> instsToExecute;
  
      /**
-     * Struct for comparing entries to be added to the priority queue.  This
-     * gives reverse ordering to the instructions in terms of sequence
-     * numbers: the instructions with smaller sequence numbers (and hence
-     * are older) will be at the top of the priority queue.
+     * Struct for comparing entries to be added to the priority queue.
+     * This gives reverse ordering to the instructions in terms of
+     * sequence numbers: the instructions with smaller sequence
+     * numbers (and hence are older) will be at the top of the
+     * priority queue.
       */
      struct pqCompare {
          bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
@@ -393,6 +405,7 @@ class InstructionQueue
       */
      unsigned commitToIEWDelay;
  
+    /** Is the IQ switched out. */
      bool switchedOut;
  
      /** The sequence number of the squashed instruction. */
@@ -460,19 +473,28 @@ class InstructionQueue
       */
      Stats::Scalar<> iqSquashedNonSpecRemoved;
  
+    /** Distribution of number of instructions in the queue. */
      Stats::VectorDistribution<> queueResDist;
+    /** Distribution of the number of instructions issued. */
      Stats::Distribution<> numIssuedDist;
+    /** Distribution of the cycles it takes to issue an instruction. */
      Stats::VectorDistribution<> issueDelayDist;
  
+    /** Number of times an instruction could not be issued because a
+     * FU was busy.
+     */
      Stats::Vector<> statFuBusy;
  //    Stats::Vector<> dist_unissued;
+    /** Stat for total number issued for each instruction type. */
      Stats::Vector2d<> statIssuedInstType;
  
+    /** Number of instructions issued per cycle. */
      Stats::Formula issueRate;
  //    Stats::Formula issue_stores;
  //    Stats::Formula issue_op_rate;
-    Stats::Vector<> fuBusy;  //cumulative fu busy
-
+    /** Number of times the FU was busy. */
+    Stats::Vector<> fuBusy;
+    /** Number of times the FU was busy per instruction issued. */
      Stats::Formula fuBusyRate;
  };
  
diff --git a/cpu/o3/inst_queue_impl.hh b/cpu/o3/inst_queue_impl.hh

index f1dc4e01f64c1d971910cf58d02e082e248f46c8..d677a259c0e516dfb1718380d3b38d9da0a22a83 100644 (file)
--- a/cpu/o3/inst_queue_impl.hh
+++ b/cpu/o3/inst_queue_impl.hh
@@ -151,8 +151,10 @@ template <class Impl>
  InstructionQueue<Impl>::~InstructionQueue()
  {
      dependGraph.reset();
+#ifdef DEBUG
      cprintf("Nodes traversed: %i, removed: %i\n",
              dependGraph.nodesTraversed, dependGraph.nodesRemoved);
+#endif
  }
  
  template <class Impl>
@@ -669,14 +671,8 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
      // @todo: Ensure that these FU Completions happen at the beginning
      // of a cycle, otherwise they could add too many instructions to
      // the queue.
-    // @todo: This could break if there's multiple multi-cycle ops
-    // finishing on this cycle.  Maybe implement something like
-    // instToCommit in iew_impl.hh.
      issueToExecuteQueue->access(0)->size++;
      instsToExecute.push_back(inst);
-//    int &size = issueToExecuteQueue->access(0)->size;
-
-//    issueToExecuteQueue->access(0)->insts[size++] = inst;
  }
  
  // @todo: Figure out a better way to remove the squashed items from the
@@ -742,9 +738,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
              }
          }
  
+        // If we have an instruction that doesn't require a FU, or a
+        // valid FU, then schedule for execution.
          if (idx == -2 || idx != -1) {
              if (op_latency == 1) {
-//                i2e_info->insts[exec_queue_slot++] = issuing_inst;
                  i2e_info->size++;
                  instsToExecute.push_back(issuing_inst);
  
@@ -762,14 +759,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
  
                  // @todo: Enforce that issue_latency == 1 or op_latency
                  if (issue_latency > 1) {
+                    // If FU isn't pipelined, then it must be freed
+                    // upon the execution completing.
                      execution->setFreeFU();
                  } else {
-                    // @todo: Not sure I'm accounting for the
-                    // multi-cycle op in a pipelined FU properly, or
-                    // the number of instructions issued in one cycle.
-//                    i2e_info->insts[exec_queue_slot++] = issuing_inst;
-//                    i2e_info->size++;
-
                      // Add the FU onto the list of FU's to be freed next cycle.
                      fuPool->freeUnitNextCycle(idx);
                  }
@@ -814,6 +807,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
      numIssuedDist.sample(total_issued);
      iqInstsIssued+= total_issued;
  
+    // If we issued any instructions, tell the CPU we had activity.
      if (total_issued) {
          cpu->activityThisCycle();
      } else {
@@ -1364,4 +1358,45 @@ InstructionQueue<Impl>::dumpInsts()
              ++num;
          }
      }
+
+    cprintf("Insts to Execute list:\n");
+
+    int num = 0;
+    int valid_num = 0;
+    ListIt inst_list_it = instsToExecute.begin();
+
+    while (inst_list_it != instsToExecute.end())
+    {
+        cprintf("Instruction:%i\n",
+                num);
+        if (!(*inst_list_it)->isSquashed()) {
+            if (!(*inst_list_it)->isIssued()) {
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            } else if ((*inst_list_it)->isMemRef() &&
+                       !(*inst_list_it)->memOpDone) {
+                // Loads that have not been marked as executed
+                // still count towards the total instructions.
+                ++valid_num;
+                cprintf("Count:%i\n", valid_num);
+            }
+        }
+
+        cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
+                "Issued:%i\nSquashed:%i\n",
+                (*inst_list_it)->readPC(),
+                (*inst_list_it)->seqNum,
+                (*inst_list_it)->threadNumber,
+                (*inst_list_it)->isIssued(),
+                (*inst_list_it)->isSquashed());
+
+        if ((*inst_list_it)->isMemRef()) {
+            cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
+        }
+
+        cprintf("\n");
+
+        inst_list_it++;
+        ++num;
+    }
  }
diff --git a/cpu/o3/lsq.hh b/cpu/o3/lsq.hh

index a1eeccbe744ae79ada108fc5787864e8b5ce3a10..b321d45900d590f4f691e42757295a51c60378b9 100644 (file)
--- a/cpu/o3/lsq.hh
+++ b/cpu/o3/lsq.hh
@@ -49,6 +49,7 @@ class LSQ {
      typedef typename Impl::CPUPol::IEW IEW;
      typedef typename Impl::CPUPol::LSQUnit LSQUnit;
  
+    /** SMT policy. */
      enum LSQPolicy {
          Dynamic,
          Partitioned,
@@ -69,8 +70,9 @@ class LSQ {
      void setIEW(IEW *iew_ptr);
      /** Sets the page table pointer. */
  //    void setPageTable(PageTable *pt_ptr);
-
+    /** Switches out the LSQ. */
      void switchOut();
+    /** Takes over execution from another CPU's thread. */
      void takeOverFrom();
  
      /** Number of entries needed for the given amount of threads.*/
@@ -95,9 +97,6 @@ class LSQ {
      /** Executes a load. */
      Fault executeLoad(DynInstPtr &inst);
  
-    Fault executeLoad(int lq_idx, unsigned tid)
-    { return thread[tid].executeLoad(lq_idx); }
-
      /** Executes a store. */
      Fault executeStore(DynInstPtr &inst);
  
diff --git a/cpu/o3/lsq_unit.hh b/cpu/o3/lsq_unit.hh

index 942b4583d8303088c352c8da2f805d0c897e349f..a6afff7431508e2de00b3536063ff00d53beb583 100644 (file)
--- a/cpu/o3/lsq_unit.hh
+++ b/cpu/o3/lsq_unit.hh
@@ -112,10 +112,13 @@ class LSQUnit {
      /** Sets the page table pointer. */
  //    void setPageTable(PageTable *pt_ptr);
  
+    /** Switches out LSQ unit. */
      void switchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
+    /** Returns if the LSQ is switched out. */
      bool isSwitchedOut() { return switchedOut; }
  
      /** Ticks the LSQ unit, which in this case only resets the number of
@@ -180,12 +183,15 @@ class LSQUnit {
      bool loadBlocked()
      { return isLoadBlocked; }
  
+    /** Clears the signal that a load became blocked. */
      void clearLoadBlocked()
      { isLoadBlocked = false; }
  
+    /** Returns if the blocked load was handled. */
      bool isLoadBlockedHandled()
      { return loadBlockedHandled; }
  
+    /** Records the blocked load as being handled. */
      void setLoadBlockedHandled()
      { loadBlockedHandled = true; }
  
@@ -331,6 +337,7 @@ class LSQUnit {
      /** The number of used cache ports in this cycle. */
      int usedPorts;
  
+    /** Is the LSQ switched out. */
      bool switchedOut;
  
      //list<InstSeqNum> mshrSeqNums;
@@ -350,8 +357,10 @@ class LSQUnit {
      /** Whether or not a load is blocked due to the memory system. */
      bool isLoadBlocked;
  
+    /** Has the blocked load been handled. */
      bool loadBlockedHandled;
  
+    /** The sequence number of the blocked load. */
      InstSeqNum blockedLoadSeqNum;
  
      /** The oldest load that caused a memory ordering violation. */
@@ -452,10 +461,10 @@ LSQUnit<Impl>::read(MemReqPtr &req, T &data, int load_idx)
          cpu->lockFlag = true;
      }
  #endif
-            req->cmd = Read;
-            assert(!req->completionEvent);
-            req->completionEvent = NULL;
-            req->time = curTick;
+    req->cmd = Read;
+    assert(!req->completionEvent);
+    req->completionEvent = NULL;
+    req->time = curTick;
  
      while (store_idx != -1) {
          // End once we've reached the top of the LSQ
diff --git a/cpu/o3/lsq_unit_impl.hh b/cpu/o3/lsq_unit_impl.hh

index 10f2b557274b28ce7c967e301ed35f6bb579c04f..4ee8bb234173399c34ff27c01520244367e4aac5 100644 (file)
--- a/cpu/o3/lsq_unit_impl.hh
+++ b/cpu/o3/lsq_unit_impl.hh
@@ -477,7 +477,6 @@ LSQUnit<Impl>::commitLoad()
      DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
              loadQueue[loadHead]->readPC());
  
-
      loadQueue[loadHead] = NULL;
  
      incrLdIdx(loadHead);
diff --git a/cpu/o3/mem_dep_unit.hh b/cpu/o3/mem_dep_unit.hh

index acbe08ec2dea8a09665b74301a6ef3187fa544e2..bb0406de1164c27b87678c41a1d44e313223e7ae 100644 (file)
--- a/cpu/o3/mem_dep_unit.hh
+++ b/cpu/o3/mem_dep_unit.hh
@@ -84,8 +84,10 @@ class MemDepUnit {
      /** Registers statistics. */
      void regStats();
  
+    /** Switches out the memory dependence predictor. */
      void switchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
      /** Sets the pointer to the IQ. */
@@ -155,10 +157,12 @@ class MemDepUnit {
              : inst(new_inst), regsReady(false), memDepReady(false),
                completed(false), squashed(false)
          {
+#ifdef DEBUG
              ++memdep_count;
  
              DPRINTF(MemDepUnit, "Memory dependency entry created.  "
                      "memdep_count=%i\n", memdep_count);
+#endif
          }
  
          /** Frees any pointers. */
@@ -167,11 +171,12 @@ class MemDepUnit {
              for (int i = 0; i < dependInsts.size(); ++i) {
                  dependInsts[i] = NULL;
              }
-
+#ifdef DEBUG
              --memdep_count;
  
              DPRINTF(MemDepUnit, "Memory dependency entry deleted.  "
                      "memdep_count=%i\n", memdep_count);
+#endif
          }
  
          /** Returns the name of the memory dependence entry. */
@@ -196,9 +201,11 @@ class MemDepUnit {
          bool squashed;
  
          /** For debugging. */
+#ifdef DEBUG
          static int memdep_count;
          static int memdep_insert;
          static int memdep_erase;
+#endif
      };
  
      /** Finds the memory dependence entry in the hash map. */
@@ -227,9 +234,13 @@ class MemDepUnit {
       */
      MemDepPred depPred;
  
+    /** Is there an outstanding load barrier that loads must wait on. */
      bool loadBarrier;
+    /** The sequence number of the load barrier. */
      InstSeqNum loadBarrierSN;
+    /** Is there an outstanding store barrier that loads must wait on. */
      bool storeBarrier;
+    /** The sequence number of the store barrier. */
      InstSeqNum storeBarrierSN;
  
      /** Pointer to the IQ. */
diff --git a/cpu/o3/mem_dep_unit_impl.hh b/cpu/o3/mem_dep_unit_impl.hh

index 8b195baabfc4ebebc930f54fc93c0ab40cad0c01..595e9293fb577ed4c669942af51c71738eb0c806 100644 (file)
--- a/cpu/o3/mem_dep_unit_impl.hh
+++ b/cpu/o3/mem_dep_unit_impl.hh
@@ -105,6 +105,7 @@ template <class MemDepPred, class Impl>
  void
  MemDepUnit<MemDepPred, Impl>::switchOut()
  {
+    // Clear any state.
      for (int i = 0; i < Impl::MaxThreads; ++i) {
          instList[i].clear();
      }
@@ -116,6 +117,7 @@ template <class MemDepPred, class Impl>
  void
  MemDepUnit<MemDepPred, Impl>::takeOverFrom()
  {
+    // Be sure to reset all state.
      loadBarrier = storeBarrier = false;
      loadBarrierSN = storeBarrierSN = 0;
      depPred.clear();
@@ -146,7 +148,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
      inst_entry->listIt = --(instList[tid].end());
  
      // Check any barriers and the dependence predictor for any
-    // producing stores.
+    // producing memrefs/stores.
      InstSeqNum producing_store;
      if (inst->isLoad() && loadBarrier) {
          producing_store = loadBarrierSN;
@@ -253,6 +255,7 @@ void
  MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
  {
      InstSeqNum barr_sn = barr_inst->seqNum;
+    // Memory barriers block loads and stores, write barriers only stores.
      if (barr_inst->isMemBarrier()) {
          loadBarrier = true;
          loadBarrierSN = barr_sn;
@@ -330,6 +333,7 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
      DynInstPtr temp_inst;
      bool found_inst = false;
  
+    // For now this replay function replays all waiting memory ops.
      while (!instsToReplay.empty()) {
          temp_inst = instsToReplay.front();
  
diff --git a/cpu/o3/rename.hh b/cpu/o3/rename.hh

index 3f1a27bb526b670468e1ef52acb63ca10604142f..4912431add74c18c772bed8ef326ec30959ea0b8 100644 (file)
--- a/cpu/o3/rename.hh
+++ b/cpu/o3/rename.hh
@@ -155,10 +155,13 @@ class DefaultRename
      /** Sets pointer to the scoreboard. */
      void setScoreboard(Scoreboard *_scoreboard);
  
+    /** Switches out the rename stage. */
      void switchOut();
  
+    /** Completes the switch out. */
      void doSwitchOut();
  
+    /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
      /** Squashes all instructions in a thread. */
@@ -243,8 +246,10 @@ class DefaultRename
      /** Checks if any stages are telling rename to block. */
      bool checkStall(unsigned tid);
  
+    /** Gets the number of free entries for a specific thread. */
      void readFreeEntries(unsigned tid);
  
+    /** Checks the signals and updates the status. */
      bool checkSignalsAndUpdate(unsigned tid);
  
      /** Either serializes on the next instruction available in the InstQueue,
@@ -454,8 +459,11 @@ class DefaultRename
      Stats::Scalar<> renameCommittedMaps;
      /** Stat for total number of mappings that were undone due to a squash. */
      Stats::Scalar<> renameUndoneMaps;
+    /** Number of serialize instructions handled. */
      Stats::Scalar<> renamedSerializing;
+    /** Number of instructions marked as temporarily serializing. */
      Stats::Scalar<> renamedTempSerializing;
+    /** Number of instructions inserted into skid buffers. */
      Stats::Scalar<> renameSkidInsts;
  };
  
diff --git a/cpu/o3/rename_impl.hh b/cpu/o3/rename_impl.hh

index b4f1077d1befcb2b9b4a9871392a8f00a713197e..829c995841195a8826e9aae88e0a6a66e9e3cef0 100644 (file)
--- a/cpu/o3/rename_impl.hh
+++ b/cpu/o3/rename_impl.hh
@@ -258,6 +258,7 @@ template <class Impl>
  void
  DefaultRename<Impl>::switchOut()
  {
+    // Rename is ready to switch out at any time.
      cpu->signalSwitched();
  }
  
@@ -265,6 +266,7 @@ template <class Impl>
  void
  DefaultRename<Impl>::doSwitchOut()
  {
+    // Clear any state, fix up the rename map.
      for (int i = 0; i < numThreads; i++) {
          typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
  
diff --git a/cpu/o3/rename_map.hh b/cpu/o3/rename_map.hh

index d7e49ae833942766ce7d007c8e7699e55eb8b73f..1ac6272643f253f83e4ed90dd1a00c400d016e81 100644 (file)
--- a/cpu/o3/rename_map.hh
+++ b/cpu/o3/rename_map.hh
@@ -62,12 +62,13 @@ class SimpleRenameMap
      typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
  
    public:
-    //Constructor
-     SimpleRenameMap() {};
+    /** Default constructor.  init() must be called prior to use. */
+    SimpleRenameMap() {};
  
      /** Destructor. */
      ~SimpleRenameMap();
  
+    /** Initializes rename map with given parameters. */
      void init(unsigned _numLogicalIntRegs,
                unsigned _numPhysicalIntRegs,
                PhysRegIndex &_int_reg_start,
@@ -84,6 +85,7 @@ class SimpleRenameMap
                int id,
                bool bindRegs);
  
+    /** Sets the free list used with this rename map. */
      void setFreeList(SimpleFreeList *fl_ptr);
  
      //Tell rename map to get a free physical register for a given
@@ -149,7 +151,6 @@ class SimpleRenameMap
          { }
      };
  
-    //Change this to private
    private:
      /** Integer rename map. */
      std::vector<RenameEntry> intRenameMap;
diff --git a/cpu/o3/rob.hh b/cpu/o3/rob.hh

index e05eebe5a97bbf7d99cae8059815db3609c930a9..bdbdde32f986548d3122d70771a892325078b339 100644 (file)
--- a/cpu/o3/rob.hh
+++ b/cpu/o3/rob.hh
@@ -95,8 +95,10 @@ class ROB
       */
      void setActiveThreads(std::list<unsigned>* at_ptr);
  
+    /** Switches out the ROB. */
      void switchOut();
  
+    /** Takes over another CPU's thread. */
      void takeOverFrom();
  
      /** Function to insert an instruction into the ROB. Note that whatever
@@ -298,6 +300,7 @@ class ROB
      /** Number of instructions in the ROB. */
      int numInstsInROB;
  
+    /** Dummy instruction returned if there are no insts left. */
      DynInstPtr dummyInst;
  
    private:
diff --git a/cpu/o3/store_set.cc b/cpu/o3/store_set.cc

index 0c957c8c7f1fbac8d3b2da875fca7b4d94988923..67ccf1b5500ac0e4e0dc95ffb8829ce15c3dad39 100644 (file)
--- a/cpu/o3/store_set.cc
+++ b/cpu/o3/store_set.cc
@@ -26,6 +26,7 @@
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
+#include "base/intmath.hh"
  #include "base/trace.hh"
  #include "cpu/o3/store_set.hh"
  
@@ -36,6 +37,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
      DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
              SSITSize, LFSTSize);
  
+    if (!isPowerOf2(SSITSize)) {
+        fatal("Invalid SSIT size!\n");
+    }
+
      SSIT.resize(SSITSize);
  
      validSSIT.resize(SSITSize);
@@ -43,6 +48,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
      for (int i = 0; i < SSITSize; ++i)
          validSSIT[i] = false;
  
+    if (!isPowerOf2(LFSTSize)) {
+        fatal("Invalid LFST size!\n");
+    }
+
      LFST.resize(LFSTSize);
  
      validLFST.resize(LFSTSize);
@@ -318,3 +327,19 @@ StoreSet::clear()
  
      storeList.clear();
  }
+
+void
+StoreSet::dump()
+{
+    cprintf("storeList.size(): %i\n", storeList.size());
+    SeqNumMapIt store_list_it = storeList.begin();
+
+    int num = 0;
+
+    while (store_list_it != storeList.end()) {
+        cprintf("%i: [sn:%lli] SSID:%i\n",
+                num, (*store_list_it).first, (*store_list_it).second);
+        num++;
+        store_list_it++;
+    }
+}
diff --git a/cpu/o3/store_set.hh b/cpu/o3/store_set.hh

index 7189db3abe31acd3706814198022a0acb94c36d8..5f875131c073faa821fa9e053bb43ec2a3043edd 100644 (file)
--- a/cpu/o3/store_set.hh
+++ b/cpu/o3/store_set.hh
@@ -44,58 +44,98 @@ struct ltseqnum {
      }
  };
  
+/**
+ * Implements a store set predictor for determining if memory
+ * instructions are dependent upon each other.  See paper "Memory
+ * Dependence Prediction using Store Sets" by Chrysos and Emer.  SSID
+ * stands for Store Set ID, SSIT stands for Store Set ID Table, and
+ * LFST is Last Fetched Store Table.
+ */
  class StoreSet
  {
    public:
      typedef unsigned SSID;
  
    public:
+    /** Default constructor.  init() must be called prior to use. */
      StoreSet() { };
  
+    /** Creates store set predictor with given table sizes. */
      StoreSet(int SSIT_size, int LFST_size);
  
+    /** Default destructor. */
      ~StoreSet();
  
+    /** Initializes the store set predictor with the given table sizes. */
      void init(int SSIT_size, int LFST_size);
  
+    /** Records a memory ordering violation between the younger load
+     * and the older store. */
      void violation(Addr store_PC, Addr load_PC);
  
+    /** Inserts a load into the store set predictor.  This does nothing but
+     * is included in case other predictors require a similar function.
+     */
      void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
  
+    /** Inserts a store into the store set predictor.  Updates the
+     * LFST if the store has a valid SSID. */
      void insertStore(Addr store_PC, InstSeqNum store_seq_num,
                       unsigned tid);
  
+    /** Checks if the instruction with the given PC is dependent upon
+     * any store.  @return Returns the sequence number of the store
+     * instruction this PC is dependent upon.  Returns 0 if none.
+     */
      InstSeqNum checkInst(Addr PC);
  
+    /** Records this PC/sequence number as issued. */
      void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
  
+    /** Squashes for a specific thread until the given sequence number. */
      void squash(InstSeqNum squashed_num, unsigned tid);
  
+    /** Resets all tables. */
      void clear();
  
+    /** Debug function to dump the contents of the store list. */
+    void dump();
+
    private:
+    /** Calculates the index into the SSIT based on the PC. */
      inline int calcIndex(Addr PC)
      { return (PC >> offsetBits) & indexMask; }
  
+    /** Calculates a Store Set ID based on the PC. */
      inline SSID calcSSID(Addr PC)
      { return ((PC ^ (PC >> 10)) % LFSTSize); }
  
+    /** The Store Set ID Table. */
      std::vector<SSID> SSIT;
  
+    /** Bit vector to tell if the SSIT has a valid entry. */
      std::vector<bool> validSSIT;
  
+    /** Last Fetched Store Table. */
      std::vector<InstSeqNum> LFST;
  
+    /** Bit vector to tell if the LFST has a valid entry. */
      std::vector<bool> validLFST;
  
+    /** Map of stores that have been inserted into the store set, but
+     * not yet issued or squashed.
+     */
      std::map<InstSeqNum, int, ltseqnum> storeList;
  
      typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
  
+    /** Store Set ID Table size, in entries. */
      int SSITSize;
  
+    /** Last Fetched Store Table size, in entries. */
      int LFSTSize;
  
+    /** Mask to obtain the index. */
      int indexMask;
  
      // HACK: Hardcoded for now.
diff --git a/cpu/o3/thread_state.hh b/cpu/o3/thread_state.hh

index 2c9788e4bcb93e4a21675bc6e8f9736b7a322b5a..3f1208ea0c0a834c076d329f20f1e01f40a7a934 100644 (file)
--- a/cpu/o3/thread_state.hh
+++ b/cpu/o3/thread_state.hh
@@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState {
      typedef ExecContext::Status Status;
      typedef typename Impl::FullCPU FullCPU;
  
+    /** Current status of the thread. */
      Status _status;
  
-    // Current instruction
+    /** Current instruction the thread is committing.  Only set and
+     * used for DTB faults currently.
+     */
      TheISA::MachInst inst;
+
    private:
+    /** Pointer to the CPU. */
      FullCPU *cpu;
    public:
-
+    /** Whether or not the thread is currently in syscall mode, and
+     * thus able to be externally updated without squashing.
+     */
      bool inSyscall;
  
+    /** Whether or not the thread is currently waiting on a trap, and
+     * thus able to be externally updated without squashing.
+     */
      bool trapPending;
  
  #if FULL_SYSTEM
@@ -88,31 +98,44 @@ struct O3ThreadState : public ThreadState {
      { }
  #endif
  
+    /** Pointer to the ExecContext of this thread.  @todo: Don't call
+     this a proxy.*/
      ExecContext *xcProxy;
  
+    /** Returns a pointer to the XC of this thread. */
      ExecContext *getXCProxy() { return xcProxy; }
  
+    /** Returns the status of this thread. */
      Status status() const { return _status; }
  
+    /** Sets the status of this thread. */
      void setStatus(Status new_status) { _status = new_status; }
  
  #if !FULL_SYSTEM
+    /** Returns if this address is a valid instruction address. */
      bool validInstAddr(Addr addr)
      { return process->validInstAddr(addr); }
  
+    /** Returns if this address is a valid data address. */
      bool validDataAddr(Addr addr)
      { return process->validDataAddr(addr); }
  #endif
  
-    bool misspeculating() { return false; }
-
+    /** Sets the current instruction being committed. */
      void setInst(TheISA::MachInst _inst) { inst = _inst; }
  
+    /** Reads the number of instructions functionally executed and
+     * committed.
+     */
      Counter readFuncExeInst() { return funcExeInst; }
  
+    /** Sets the total number of instructions functionally executed
+     * and committed.
+     */
      void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
  
  #if !FULL_SYSTEM
+    /** Handles the syscall. */
      void syscall() { process->syscall(xcProxy); }
  #endif
  };
author	Kevin Lim <ktlim@umich.edu>
	Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)
committer	Kevin Lim <ktlim@umich.edu>
	Wed, 31 May 2006 15:45:02 +0000 (11:45 -0400)
cpu/activity.cc		patch \| blob \| history
cpu/activity.hh		patch \| blob \| history
cpu/base_dyn_inst.cc		patch \| blob \| history
cpu/o3/alpha_cpu.hh		patch \| blob \| history
cpu/o3/alpha_cpu_impl.hh		patch \| blob \| history
cpu/o3/alpha_dyn_inst.hh		patch \| blob \| history
cpu/o3/alpha_dyn_inst_impl.hh		patch \| blob \| history
cpu/o3/alpha_params.hh		patch \| blob \| history
cpu/o3/comm.hh		patch \| blob \| history
cpu/o3/commit.hh		patch \| blob \| history
cpu/o3/commit_impl.hh		patch \| blob \| history
cpu/o3/cpu.hh		patch \| blob \| history
cpu/o3/cpu_policy.hh		patch \| blob \| history
cpu/o3/decode.hh		patch \| blob \| history
cpu/o3/decode_impl.hh		patch \| blob \| history
cpu/o3/dep_graph.hh		patch \| blob \| history
cpu/o3/fetch.hh		patch \| blob \| history
cpu/o3/fetch_impl.hh		patch \| blob \| history
cpu/o3/fu_pool.cc		patch \| blob \| history
cpu/o3/fu_pool.hh		patch \| blob \| history
cpu/o3/iew.hh		patch \| blob \| history
cpu/o3/iew_impl.hh		patch \| blob \| history
cpu/o3/inst_queue.hh		patch \| blob \| history
cpu/o3/inst_queue_impl.hh		patch \| blob \| history
cpu/o3/lsq.hh		patch \| blob \| history
cpu/o3/lsq_unit.hh		patch \| blob \| history
cpu/o3/lsq_unit_impl.hh		patch \| blob \| history
cpu/o3/mem_dep_unit.hh		patch \| blob \| history
cpu/o3/mem_dep_unit_impl.hh		patch \| blob \| history
cpu/o3/rename.hh		patch \| blob \| history
cpu/o3/rename_impl.hh		patch \| blob \| history
cpu/o3/rename_map.hh		patch \| blob \| history
cpu/o3/rob.hh		patch \| blob \| history
cpu/o3/store_set.cc		patch \| blob \| history
cpu/o3/store_set.hh		patch \| blob \| history
cpu/o3/thread_state.hh		patch \| blob \| history