cpu: Add HTM ExecContext API

[gem5.git] / src / cpu / base_dyn_inst.hh
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh

index a9cb60070571eef3866f9e1df2e9b140e09c84ec..bab801935d812b29335398c0360b9ae2924a5091 100644 (file)
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,6 @@
  /*
- * Copyright (c) 2011 ARM Limited
+ * Copyright (c) 2011, 2013, 2016-2020 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -37,34 +38,33 @@
   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Kevin Lim
- *          Timothy M. Jones
   */
  
  #ifndef __CPU_BASE_DYN_INST_HH__
  #define __CPU_BASE_DYN_INST_HH__
  
+#include <array>
  #include <bitset>
+#include <deque>
  #include <list>
  #include <string>
-#include <queue>
  
+#include "arch/generic/tlb.hh"
  #include "arch/utility.hh"
  #include "base/trace.hh"
  #include "config/the_isa.hh"
  #include "cpu/checker/cpu.hh"
-#include "cpu/o3/comm.hh"
+#include "cpu/exec_context.hh"
  #include "cpu/exetrace.hh"
+#include "cpu/inst_res.hh"
  #include "cpu/inst_seq.hh"
  #include "cpu/op_class.hh"
  #include "cpu/static_inst.hh"
  #include "cpu/translation.hh"
  #include "mem/packet.hh"
+#include "mem/request.hh"
  #include "sim/byteswap.hh"
-#include "sim/fault_fwd.hh"
  #include "sim/system.hh"
-#include "sim/tlb.hh"
  
  /**
   * @file
@@ -72,19 +72,17 @@
   */
  
  template <class Impl>
-class BaseDynInst : public RefCounted
+class BaseDynInst : public ExecContext, public RefCounted
  {
    public:
      // Typedef for the CPU.
      typedef typename Impl::CPUType ImplCPU;
      typedef typename ImplCPU::ImplState ImplState;
+    using VecRegContainer = TheISA::VecRegContainer;
  
-    // Logical register index type.
-    typedef TheISA::RegIndex RegIndex;
-    // Integer register type.
-    typedef TheISA::IntReg IntReg;
-    // Floating point register type.
-    typedef TheISA::FloatReg FloatReg;
+    using LSQRequestPtr = typename Impl::CPUPol::LSQ::LSQRequest*;
+    using LQIterator = typename Impl::CPUPol::LSQUnit::LQIterator;
+    using SQIterator = typename Impl::CPUPol::LSQUnit::SQIterator;
  
      // The DynInstPtr type.
      typedef typename Impl::DynInstPtr DynInstPtr;
@@ -98,92 +96,7 @@ class BaseDynInst : public RefCounted
          MaxInstDestRegs = TheISA::MaxInstDestRegs       /// Max dest regs
      };
  
-    /** The StaticInst used by this BaseDynInst. */
-    StaticInstPtr staticInst;
-    StaticInstPtr macroop;
-
-    ////////////////////////////////////////////
-    //
-    // INSTRUCTION EXECUTION
-    //
-    ////////////////////////////////////////////
-    /** InstRecord that tracks this instructions. */
-    Trace::InstRecord *traceData;
-
-    void demapPage(Addr vaddr, uint64_t asn)
-    {
-        cpu->demapPage(vaddr, asn);
-    }
-    void demapInstPage(Addr vaddr, uint64_t asn)
-    {
-        cpu->demapPage(vaddr, asn);
-    }
-    void demapDataPage(Addr vaddr, uint64_t asn)
-    {
-        cpu->demapPage(vaddr, asn);
-    }
-
-    Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags);
-
-    Fault writeMem(uint8_t *data, unsigned size,
-                   Addr addr, unsigned flags, uint64_t *res);
-
-    /** Splits a request in two if it crosses a dcache block. */
-    void splitRequest(RequestPtr req, RequestPtr &sreqLow,
-                      RequestPtr &sreqHigh);
-
-    /** Initiate a DTB address translation. */
-    void initiateTranslation(RequestPtr req, RequestPtr sreqLow,
-                             RequestPtr sreqHigh, uint64_t *res,
-                             BaseTLB::Mode mode);
-
-    /** Finish a DTB address translation. */
-    void finishTranslation(WholeTranslationState *state);
-
-    /** True if the DTB address translation has started. */
-    bool translationStarted;
-
-    /** True if the DTB address translation has completed. */
-    bool translationCompleted;
-
-    /** True if this address was found to match a previous load and they issued
-     * out of order. If that happend, then it's only a problem if an incoming
-     * snoop invalidate modifies the line, in which case we need to squash.
-     * If nothing modified the line the order doesn't matter.
-     */
-    bool possibleLoadViolation;
-
-    /** True if the address hit a external snoop while sitting in the LSQ.
-     * If this is true and a older instruction sees it, this instruction must
-     * reexecute
-     */
-    bool hitExternalSnoop;
-
-    /**
-     * Returns true if the DTB address translation is being delayed due to a hw
-     * page table walk.
-     */
-    bool isTranslationDelayed() const
-    {
-        return (translationStarted && !translationCompleted);
-    }
-
-    /**
-     * Saved memory requests (needed when the DTB address translation is
-     * delayed due to a hw page table walk).
-     */
-    RequestPtr savedReq;
-    RequestPtr savedSreqLow;
-    RequestPtr savedSreqHigh;
-
-    // Need a copy of main request pointer to verify on writes.
-    RequestPtr reqToVerify;
-
-    /** @todo: Consider making this private. */
-  public:
-    /** The sequence number of the instruction. */
-    InstSeqNum seqNum;
-
+  protected:
      enum Status {
          IqEntry,                 /// Instruction is in the IQ
          RobEntry,                /// Instruction is in the ROB
@@ -200,6 +113,9 @@ class BaseDynInst : public RefCounted
          SquashedInIQ,            /// Instruction is squashed in the IQ
          SquashedInLSQ,           /// Instruction is squashed in the LSQ
          SquashedInROB,           /// Instruction is squashed in the ROB
+        PinnedRegsRenamed,       /// Pinned registers are renamed
+        PinnedRegsWritten,       /// Pinned registers are written back
+        PinnedRegsSquashDone,    /// Regs pinning status updated after squash
          RecoverInst,             /// Is a recover instruction
          BlockingInst,            /// Is a blocking instruction
          ThreadsyncWait,          /// Is a thread synchronization instruction
@@ -210,146 +126,253 @@ class BaseDynInst : public RefCounted
          NumStatus
      };
  
-    /** The status of this BaseDynInst.  Several bits can be set. */
-    std::bitset<NumStatus> status;
-
-    /** The thread this instruction is from. */
-    ThreadID threadNumber;
+    enum Flags {
+        NotAnInst,
+        TranslationStarted,
+        TranslationCompleted,
+        PossibleLoadViolation,
+        HitExternalSnoop,
+        EffAddrValid,
+        RecordResult,
+        Predicate,
+        MemAccPredicate,
+        PredTaken,
+        IsStrictlyOrdered,
+        ReqMade,
+        MemOpDone,
+        MaxFlags
+    };
  
-    /** data address space ID, for loads & stores. */
-    short asid;
+  public:
+    /** The sequence number of the instruction. */
+    InstSeqNum seqNum;
  
-    /** How many source registers are ready. */
-    unsigned readyRegs;
+    /** The StaticInst used by this BaseDynInst. */
+    const StaticInstPtr staticInst;
  
      /** Pointer to the Impl's CPU object. */
      ImplCPU *cpu;
  
+    BaseCPU *getCpuPtr() { return cpu; }
+
      /** Pointer to the thread state. */
      ImplState *thread;
  
      /** The kind of fault this instruction has generated. */
      Fault fault;
  
-    /** Pointer to the data for the memory access. */
-    uint8_t *memData;
-
-    /** The effective virtual address (lds & stores only). */
-    Addr effAddr;
-
-    /** The size of the request */
-    Addr effSize;
-
-    /** Is the effective virtual address valid. */
-    bool effAddrValid;
-
-    /** The effective physical address. */
-    Addr physEffAddr;
-
-    /** The memory request flags (from translation). */
-    unsigned memReqFlags;
-
-    union Result {
-        uint64_t integer;
-        double dbl;
-        void set(uint64_t i) { integer = i; }
-        void set(double d) { dbl = d; }
-        void get(uint64_t& i) { i = integer; }
-        void get(double& d) { d = dbl; }
-    };
+    /** InstRecord that tracks this instructions. */
+    Trace::InstRecord *traceData;
  
+  protected:
      /** The result of the instruction; assumes an instruction can have many
       *  destination registers.
       */
-    std::queue<Result> instResult;
+    std::queue<InstResult> instResult;
  
-    /** Records changes to result? */
-    bool recordResult;
+    /** PC state for this instruction. */
+    TheISA::PCState pc;
+
+  private:
+    /* An amalgamation of a lot of boolean values into one */
+    std::bitset<MaxFlags> instFlags;
  
-    /** Did this instruction execute, or is it predicated false */
-    bool predicate;
+    /** The status of this BaseDynInst.  Several bits can be set. */
+    std::bitset<NumStatus> status;
  
    protected:
-    /** PC state for this instruction. */
-    TheISA::PCState pc;
+     /** Whether or not the source register is ready.
+     *  @todo: Not sure this should be here vs the derived class.
+     */
+    std::bitset<MaxInstSrcRegs> _readySrcRegIdx;
+
+  public:
+    /** The thread this instruction is from. */
+    ThreadID threadNumber;
  
+    /** Iterator pointing to this BaseDynInst in the list of all insts. */
+    ListIt instListIt;
+
+    ////////////////////// Branch Data ///////////////
      /** Predicted PC state after this instruction. */
      TheISA::PCState predPC;
  
-    /** If this is a branch that was predicted taken */
-    bool predTaken;
+    /** The Macroop if one exists */
+    const StaticInstPtr macroop;
+
+    /** How many source registers are ready. */
+    uint8_t readyRegs;
  
    public:
+    /////////////////////// Load Store Data //////////////////////
+    /** The effective virtual address (lds & stores only). */
+    Addr effAddr;
  
-#ifdef DEBUG
-    void dumpSNList();
-#endif
+    /** The effective physical address. */
+    Addr physEffAddr;
  
-    /** Whether or not the source register is ready.
-     *  @todo: Not sure this should be here vs the derived class.
+    /** The memory request flags (from translation). */
+    unsigned memReqFlags;
+
+    /** The size of the request */
+    unsigned effSize;
+
+    /** Pointer to the data for the memory access. */
+    uint8_t *memData;
+
+    /** Load queue index. */
+    int16_t lqIdx;
+    LQIterator lqIt;
+
+    /** Store queue index. */
+    int16_t sqIdx;
+    SQIterator sqIt;
+
+
+    /////////////////////// TLB Miss //////////////////////
+    /**
+     * Saved memory request (needed when the DTB address translation is
+     * delayed due to a hw page table walk).
       */
-    bool _readySrcRegIdx[MaxInstSrcRegs];
+    LSQRequestPtr savedReq;
+
+    /////////////////////// Checker //////////////////////
+    // Need a copy of main request pointer to verify on writes.
+    RequestPtr reqToVerify;
  
    protected:
      /** Flattened register index of the destination registers of this
       *  instruction.
       */
-    TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs];
-
-    /** Flattened register index of the source registers of this
-     *  instruction.
-     */
-    TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs];
+    std::array<RegId, TheISA::MaxInstDestRegs> _flatDestRegIdx;
  
      /** Physical register index of the destination registers of this
       *  instruction.
       */
-    PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs];
+    std::array<PhysRegIdPtr, TheISA::MaxInstDestRegs> _destRegIdx;
  
      /** Physical register index of the source registers of this
       *  instruction.
       */
-    PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs];
+    std::array<PhysRegIdPtr, TheISA::MaxInstSrcRegs> _srcRegIdx;
  
      /** Physical register index of the previous producers of the
       *  architected destinations.
       */
-    PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs];
+    std::array<PhysRegIdPtr, TheISA::MaxInstDestRegs> _prevDestRegIdx;
+
+
+  public:
+    /** Records changes to result? */
+    void recordResult(bool f) { instFlags[RecordResult] = f; }
+
+    /** Is the effective virtual address valid. */
+    bool effAddrValid() const { return instFlags[EffAddrValid]; }
+    void effAddrValid(bool b) { instFlags[EffAddrValid] = b; }
+
+    /** Whether or not the memory operation is done. */
+    bool memOpDone() const { return instFlags[MemOpDone]; }
+    void memOpDone(bool f) { instFlags[MemOpDone] = f; }
+
+    bool notAnInst() const { return instFlags[NotAnInst]; }
+    void setNotAnInst() { instFlags[NotAnInst] = true; }
+
+
+    ////////////////////////////////////////////
+    //
+    // INSTRUCTION EXECUTION
+    //
+    ////////////////////////////////////////////
+
+    void demapPage(Addr vaddr, uint64_t asn)
+    {
+        cpu->demapPage(vaddr, asn);
+    }
+    void demapInstPage(Addr vaddr, uint64_t asn)
+    {
+        cpu->demapPage(vaddr, asn);
+    }
+    void demapDataPage(Addr vaddr, uint64_t asn)
+    {
+        cpu->demapPage(vaddr, asn);
+    }
+
+    Fault initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
+            const std::vector<bool>& byte_enable = std::vector<bool>());
+
+    Fault initiateHtmCmd(Request::Flags flags) override;
+
+    Fault writeMem(uint8_t *data, unsigned size, Addr addr,
+                   Request::Flags flags, uint64_t *res,
+                   const std::vector<bool>& byte_enable = std::vector<bool>());
+
+    Fault initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
+                         AtomicOpFunctorPtr amo_op);
+
+    /** True if the DTB address translation has started. */
+    bool translationStarted() const { return instFlags[TranslationStarted]; }
+    void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
+
+    /** True if the DTB address translation has completed. */
+    bool translationCompleted() const { return instFlags[TranslationCompleted]; }
+    void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; }
+
+    /** True if this address was found to match a previous load and they issued
+     * out of order. If that happend, then it's only a problem if an incoming
+     * snoop invalidate modifies the line, in which case we need to squash.
+     * If nothing modified the line the order doesn't matter.
+     */
+    bool possibleLoadViolation() const { return instFlags[PossibleLoadViolation]; }
+    void possibleLoadViolation(bool f) { instFlags[PossibleLoadViolation] = f; }
+
+    /** True if the address hit a external snoop while sitting in the LSQ.
+     * If this is true and a older instruction sees it, this instruction must
+     * reexecute
+     */
+    bool hitExternalSnoop() const { return instFlags[HitExternalSnoop]; }
+    void hitExternalSnoop(bool f) { instFlags[HitExternalSnoop] = f; }
+
+    /**
+     * Returns true if the DTB address translation is being delayed due to a hw
+     * page table walk.
+     */
+    bool isTranslationDelayed() const
+    {
+        return (translationStarted() && !translationCompleted());
+    }
  
    public:
+#ifdef DEBUG
+    void dumpSNList();
+#endif
  
      /** Returns the physical register index of the i'th destination
       *  register.
       */
-    PhysRegIndex renamedDestRegIdx(int idx) const
+    PhysRegIdPtr renamedDestRegIdx(int idx) const
      {
          return _destRegIdx[idx];
      }
  
      /** Returns the physical register index of the i'th source register. */
-    PhysRegIndex renamedSrcRegIdx(int idx) const
+    PhysRegIdPtr renamedSrcRegIdx(int idx) const
      {
+        assert(TheISA::MaxInstSrcRegs > idx);
          return _srcRegIdx[idx];
      }
  
      /** Returns the flattened register index of the i'th destination
       *  register.
       */
-    TheISA::RegIndex flattenedDestRegIdx(int idx) const
+    const RegId& flattenedDestRegIdx(int idx) const
      {
          return _flatDestRegIdx[idx];
      }
  
-    /** Returns the flattened register index of the i'th source register */
-    TheISA::RegIndex flattenedSrcRegIdx(int idx) const
-    {
-        return _flatSrcRegIdx[idx];
-    }
-
      /** Returns the physical register index of the previous physical register
       *  that remapped to the same logical register index.
       */
-    PhysRegIndex prevDestRegIdx(int idx) const
+    PhysRegIdPtr prevDestRegIdx(int idx) const
      {
          return _prevDestRegIdx[idx];
      }
@@ -358,33 +381,28 @@ class BaseDynInst : public RefCounted
       *  the previous physical register that the logical register mapped to.
       */
      void renameDestReg(int idx,
-                       PhysRegIndex renamed_dest,
-                       PhysRegIndex previous_rename)
+                       PhysRegIdPtr renamed_dest,
+                       PhysRegIdPtr previous_rename)
      {
          _destRegIdx[idx] = renamed_dest;
          _prevDestRegIdx[idx] = previous_rename;
+        if (renamed_dest->isPinned())
+            setPinnedRegsRenamed();
      }
  
      /** Renames a source logical register to the physical register which
       *  has/will produce that logical register's result.
       *  @todo: add in whether or not the source register is ready.
       */
-    void renameSrcReg(int idx, PhysRegIndex renamed_src)
+    void renameSrcReg(int idx, PhysRegIdPtr renamed_src)
      {
          _srcRegIdx[idx] = renamed_src;
      }
  
-    /** Flattens a source architectural register index into a logical index.
-     */
-    void flattenSrcReg(int idx, TheISA::RegIndex flattened_src)
-    {
-        _flatSrcRegIdx[idx] = flattened_src;
-    }
-
      /** Flattens a destination architectural register index into a logical
       * index.
       */
-    void flattenDestReg(int idx, TheISA::RegIndex flattened_dest)
+    void flattenDestReg(int idx, const RegId& flattened_dest)
      {
          _flatDestRegIdx[idx] = flattened_dest;
      }
@@ -395,14 +413,14 @@ class BaseDynInst : public RefCounted
       *  @param seq_num The sequence number of the instruction.
       *  @param cpu Pointer to the instruction's CPU.
       */
-    BaseDynInst(StaticInstPtr staticInst, StaticInstPtr macroop,
+    BaseDynInst(const StaticInstPtr &staticInst, const StaticInstPtr &macroop,
                  TheISA::PCState pc, TheISA::PCState predPC,
                  InstSeqNum seq_num, ImplCPU *cpu);
  
      /** BaseDynInst constructor given a StaticInst pointer.
       *  @param _staticInst The StaticInst for this BaseDynInst.
       */
-    BaseDynInst(StaticInstPtr staticInst, StaticInstPtr macroop);
+    BaseDynInst(const StaticInstPtr &staticInst, const StaticInstPtr &macroop);
  
      /** BaseDynInst destructor. */
      ~BaseDynInst();
@@ -419,16 +437,22 @@ class BaseDynInst : public RefCounted
      void dump(std::string &outstring);
  
      /** Read this CPU's ID. */
-    int cpuId() { return cpu->cpuId(); }
+    int cpuId() const { return cpu->cpuId(); }
+
+    /** Read this CPU's Socket ID. */
+    uint32_t socketId() const { return cpu->socketId(); }
  
      /** Read this CPU's data requestor ID */
-    MasterID masterId() { return cpu->dataMasterId(); }
+    MasterID masterId() const { return cpu->dataMasterId(); }
  
      /** Read this context's system-wide ID **/
-    int contextId() { return thread->contextId(); }
+    ContextID contextId() const { return thread->contextId(); }
  
      /** Returns the fault type. */
-    Fault getFault() { return fault; }
+    Fault getFault() const { return fault; }
+    /** TODO: This I added for the LSQRequest side to be able to modify the
+     * fault. There should be a better mechanism in place. */
+    Fault& getFault() { return fault; }
  
      /** Checks whether or not this instruction has had its branch target
       *  calculated yet.  For now it is not utilized and is hacked to be
@@ -457,12 +481,12 @@ class BaseDynInst : public RefCounted
      /** Returns whether the instruction was predicted taken or not. */
      bool readPredTaken()
      {
-        return predTaken;
+        return instFlags[PredTaken];
      }
  
      void setPredTaken(bool predicted_taken)
      {
-        predTaken = predicted_taken;
+        instFlags[PredTaken] = predicted_taken;
      }
  
      /** Returns whether the instruction mispredicted. */
@@ -480,12 +504,14 @@ class BaseDynInst : public RefCounted
      bool isMemRef()       const { return staticInst->isMemRef(); }
      bool isLoad()         const { return staticInst->isLoad(); }
      bool isStore()        const { return staticInst->isStore(); }
+    bool isAtomic()       const { return staticInst->isAtomic(); }
      bool isStoreConditional() const
      { return staticInst->isStoreConditional(); }
      bool isInstPrefetch() const { return staticInst->isInstPrefetch(); }
      bool isDataPrefetch() const { return staticInst->isDataPrefetch(); }
      bool isInteger()      const { return staticInst->isInteger(); }
      bool isFloating()     const { return staticInst->isFloating(); }
+    bool isVector()       const { return staticInst->isVector(); }
      bool isControl()      const { return staticInst->isControl(); }
      bool isCall()         const { return staticInst->isCall(); }
      bool isReturn()       const { return staticInst->isReturn(); }
@@ -515,6 +541,30 @@ class BaseDynInst : public RefCounted
      bool isFirstMicroop() const { return staticInst->isFirstMicroop(); }
      bool isMicroBranch() const { return staticInst->isMicroBranch(); }
  
+    uint64_t getHtmTransactionUid() const override
+    {
+        panic("Not yet implemented\n");
+        return 0;
+    }
+
+    uint64_t newHtmTransactionUid() const override
+    {
+        panic("Not yet implemented\n");
+        return 0;
+    }
+
+    bool inHtmTransactionalState() const override
+    {
+        panic("Not yet implemented\n");
+        return false;
+    }
+
+    uint64_t getHtmTransactionalDepth() const override
+    {
+        panic("Not yet implemented\n");
+        return 0;
+    }
+
      /** Temporarily sets this instruction as a serialize before instruction. */
      void setSerializeBefore() { status.set(SerializeBefore); }
  
@@ -560,75 +610,120 @@ class BaseDynInst : public RefCounted
      // for machines with separate int & FP reg files
      int8_t numFPDestRegs()  const { return staticInst->numFPDestRegs(); }
      int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
+    int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); }
+    int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); }
+    int8_t numVecElemDestRegs() const
+    {
+        return staticInst->numVecElemDestRegs();
+    }
+    int8_t
+    numVecPredDestRegs() const
+    {
+        return staticInst->numVecPredDestRegs();
+    }
  
      /** Returns the logical register index of the i'th destination register. */
-    RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); }
+    const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); }
  
      /** Returns the logical register index of the i'th source register. */
-    RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); }
+    const RegId& srcRegIdx(int i) const { return staticInst->srcRegIdx(i); }
  
-    /** Pops a result off the instResult queue */
-    template <class T>
-    void popResult(T& t)
+    /** Return the size of the instResult queue. */
+    uint8_t resultSize() { return instResult.size(); }
+
+    /** Pops a result off the instResult queue.
+     * If the result stack is empty, return the default value.
+     * */
+    InstResult popResult(InstResult dflt = InstResult())
      {
          if (!instResult.empty()) {
-            instResult.front().get(t);
+            InstResult t = instResult.front();
              instResult.pop();
+            return t;
+        }
+        return dflt;
+    }
+
+    /** Pushes a result onto the instResult queue. */
+    /** @{ */
+    /** Scalar result. */
+    template<typename T>
+    void setScalarResult(T&& t)
+    {
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::Scalar));
+        }
+    }
+
+    /** Full vector result. */
+    template<typename T>
+    void setVecResult(T&& t)
+    {
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecReg));
          }
      }
  
-    /** Read the most recent result stored by this instruction */
-    template <class T>
-    void readResult(T& t)
+    /** Vector element result. */
+    template<typename T>
+    void setVecElemResult(T&& t)
      {
-        instResult.back().get(t);
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecElem));
+        }
      }
  
-    /** Pushes a result onto the instResult queue */
-    template <class T>
-    void setResult(T t)
+    /** Predicate result. */
+    template<typename T>
+    void setVecPredResult(T&& t)
      {
-        if (recordResult) {
-            Result instRes;
-            instRes.set(t);
-            instResult.push(instRes);
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                            InstResult::ResultType::VecPredReg));
          }
      }
+    /** @} */
  
      /** Records an integer register being set to a value. */
-    void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
+    void setIntRegOperand(const StaticInst *si, int idx, RegVal val)
      {
-        setResult<uint64_t>(val);
+        setScalarResult(val);
      }
  
-    /** Records an fp register being set to a value. */
-    void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val,
-                            int width)
+    /** Records a CC register being set to a value. */
+    void setCCRegOperand(const StaticInst *si, int idx, RegVal val)
      {
-        if (width == 32 || width == 64) {
-            setResult<double>(val);
-        } else {
-            panic("Unsupported width!");
-        }
+        setScalarResult(val);
      }
  
-    /** Records an fp register being set to a value. */
-    void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
+    /** Record a vector register being set to a value */
+    void setVecRegOperand(const StaticInst *si, int idx,
+            const VecRegContainer& val)
      {
-        setResult<double>(val);
+        setVecResult(val);
      }
  
      /** Records an fp register being set to an integer value. */
-    void setFloatRegOperandBits(const StaticInst *si, int idx, uint64_t val,
-                                int width)
+    void
+    setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val)
      {
-        setResult<uint64_t>(val);
+        setScalarResult(val);
      }
  
-    /** Records an fp register being set to an integer value. */
-    void setFloatRegOperandBits(const StaticInst *si, int idx, uint64_t val)
+    /** Record a vector register being set to a value */
+    void setVecElemOperand(const StaticInst *si, int idx, const VecElem val)
      {
-        setResult<uint64_t>(val);
+        setVecElemResult(val);
+    }
+
+    /** Record a vector register being set to a value */
+    void setVecPredRegOperand(const StaticInst *si, int idx,
+                              const VecPredRegContainer& val)
+    {
+        setVecPredResult(val);
      }
  
      /** Records that one of the source registers is ready. */
@@ -699,7 +794,7 @@ class BaseDynInst : public RefCounted
      bool isCommitted() const { return status[Committed]; }
  
      /** Sets this instruction as squashed. */
-    void setSquashed() { status.set(Squashed); }
+    void setSquashed();
  
      /** Returns whether or not this instruction is squashed. */
      bool isSquashed() const { return status[Squashed]; }
@@ -734,7 +829,7 @@ class BaseDynInst : public RefCounted
      bool isInLSQ() const { return status[LsqEntry]; }
  
      /** Sets this instruction as squashed in the LSQ. */
-    void setSquashedInLSQ() { status.set(SquashedInLSQ);}
+    void setSquashedInLSQ() { status.set(SquashedInLSQ); status.set(Squashed);}
  
      /** Returns whether or not this instruction is squashed in the LSQ. */
      bool isSquashedInLSQ() const { return status[SquashedInLSQ]; }
@@ -757,37 +852,81 @@ class BaseDynInst : public RefCounted
      /** Returns whether or not this instruction is squashed in the ROB. */
      bool isSquashedInROB() const { return status[SquashedInROB]; }
  
+    /** Returns whether pinned registers are renamed */
+    bool isPinnedRegsRenamed() const { return status[PinnedRegsRenamed]; }
+
+    /** Sets the destination registers as renamed */
+    void
+    setPinnedRegsRenamed()
+    {
+        assert(!status[PinnedRegsSquashDone]);
+        assert(!status[PinnedRegsWritten]);
+        status.set(PinnedRegsRenamed);
+    }
+
+    /** Returns whether destination registers are written */
+    bool isPinnedRegsWritten() const { return status[PinnedRegsWritten]; }
+
+    /** Sets destination registers as written */
+    void
+    setPinnedRegsWritten()
+    {
+        assert(!status[PinnedRegsSquashDone]);
+        assert(status[PinnedRegsRenamed]);
+        status.set(PinnedRegsWritten);
+    }
+
+    /** Return whether dest registers' pinning status updated after squash */
+    bool
+    isPinnedRegsSquashDone() const { return status[PinnedRegsSquashDone]; }
+
+    /** Sets dest registers' status updated after squash */
+    void
+    setPinnedRegsSquashDone() {
+        assert(!status[PinnedRegsSquashDone]);
+        status.set(PinnedRegsSquashDone);
+    }
+
      /** Read the PC state of this instruction. */
-    const TheISA::PCState pcState() const { return pc; }
+    TheISA::PCState pcState() const { return pc; }
  
      /** Set the PC state of this instruction. */
-    const void pcState(const TheISA::PCState &val) { pc = val; }
+    void pcState(const TheISA::PCState &val) { pc = val; }
  
      /** Read the PC of this instruction. */
-    const Addr instAddr() const { return pc.instAddr(); }
+    Addr instAddr() const { return pc.instAddr(); }
  
      /** Read the PC of the next instruction. */
-    const Addr nextInstAddr() const { return pc.nextInstAddr(); }
+    Addr nextInstAddr() const { return pc.nextInstAddr(); }
  
      /**Read the micro PC of this instruction. */
-    const Addr microPC() const { return pc.microPC(); }
+    Addr microPC() const { return pc.microPC(); }
  
-    bool readPredicate()
+    bool readPredicate() const
      {
-        return predicate;
+        return instFlags[Predicate];
      }
  
      void setPredicate(bool val)
      {
-        predicate = val;
+        instFlags[Predicate] = val;
  
          if (traceData) {
              traceData->setPredicate(val);
          }
      }
  
-    /** Sets the ASID. */
-    void setASID(short addr_space_id) { asid = addr_space_id; }
+    bool
+    readMemAccPredicate() const
+    {
+        return instFlags[MemAccPredicate];
+    }
+
+    void
+    setMemAccPredicate(bool val)
+    {
+        instFlags[MemAccPredicate] = val;
+    }
  
      /** Sets the thread id. */
      void setTid(ThreadID tid) { threadNumber = tid; }
@@ -796,56 +935,20 @@ class BaseDynInst : public RefCounted
      void setThreadState(ImplState *state) { thread = state; }
  
      /** Returns the thread context. */
-    ThreadContext *tcBase() { return thread->getTC(); }
-
-  private:
-    /** Instruction effective address.
-     *  @todo: Consider if this is necessary or not.
-     */
-    Addr instEffAddr;
-
-    /** Whether or not the effective address calculation is completed.
-     *  @todo: Consider if this is necessary or not.
-     */
-    bool eaCalcDone;
-
-    /** Is this instruction's memory access uncacheable. */
-    bool isUncacheable;
-
-    /** Has this instruction generated a memory request. */
-    bool reqMade;
+    ThreadContext *tcBase() const { return thread->getTC(); }
  
    public:
-    /** Sets the effective address. */
-    void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
-
-    /** Returns the effective address. */
-    const Addr &getEA() const { return instEffAddr; }
-
-    /** Returns whether or not the eff. addr. calculation has been completed. */
-    bool doneEACalc() { return eaCalcDone; }
-
      /** Returns whether or not the eff. addr. source registers are ready. */
-    bool eaSrcsReady();
+    bool eaSrcsReady() const;
  
-    /** Whether or not the memory operation is done. */
-    bool memOpDone;
-
-    /** Is this instruction's memory access uncacheable. */
-    bool uncacheable() { return isUncacheable; }
+    /** Is this instruction's memory access strictly ordered? */
+    bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; }
+    void strictlyOrdered(bool so) { instFlags[IsStrictlyOrdered] = so; }
  
      /** Has this instruction generated a memory request. */
-    bool hasRequest() { return reqMade; }
-
-  public:
-    /** Load queue index. */
-    int16_t lqIdx;
-
-    /** Store queue index. */
-    int16_t sqIdx;
-
-    /** Iterator pointing to this BaseDynInst in the list of all insts. */
-    ListIt instListIt;
+    bool hasRequest() const { return instFlags[ReqMade]; }
+    /** Assert this instruction has generated a memory request. */
+    void setRequest() { instFlags[ReqMade] = true; }
  
      /** Returns iterator to this instruction in the list of all insts. */
      ListIt &getInstListIt() { return instListIt; }
@@ -855,203 +958,72 @@ class BaseDynInst : public RefCounted
  
    public:
      /** Returns the number of consecutive store conditional failures. */
-    unsigned readStCondFailures()
+    unsigned int readStCondFailures() const
      { return thread->storeCondFailures; }
  
      /** Sets the number of consecutive store conditional failures. */
-    void setStCondFailures(unsigned sc_failures)
+    void setStCondFailures(unsigned int sc_failures)
      { thread->storeCondFailures = sc_failures; }
+
+  public:
+    // monitor/mwait funtions
+    void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); }
+    bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); }
+    void mwaitAtomic(ThreadContext *tc)
+    { return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); }
+    AddressMonitor *getAddrMonitor()
+    { return cpu->getCpuAddrMonitor(threadNumber); }
  };
  
  template<class Impl>
  Fault
-BaseDynInst<Impl>::readMem(Addr addr, uint8_t *data,
-                           unsigned size, unsigned flags)
+BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
+                                   Request::Flags flags,
+                                   const std::vector<bool>& byte_enable)
  {
-    reqMade = true;
-    Request *req = NULL;
-    Request *sreqLow = NULL;
-    Request *sreqHigh = NULL;
-
-    if (reqMade && translationStarted) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
-                          thread->contextId(), threadNumber);
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
-    }
-
-    if (translationCompleted) {
-        if (fault == NoFault) {
-            effAddr = req->getVaddr();
-            effSize = size;
-            effAddrValid = true;
-
-            if (cpu->checker) {
-                if (reqToVerify != NULL) {
-                    delete reqToVerify;
-                }
-                reqToVerify = new Request(*req);
-            }
-            fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx);
-        } else {
-            // Commit will have to clean up whatever happened.  Set this
-            // instruction as executed.
-            this->setExecuted();
-        }
-
-        if (fault != NoFault) {
-            // Return a fixed value to keep simulation deterministic even
-            // along misspeculated paths.
-            if (data)
-                bzero(data, size);
-        }
-    }
-
-    if (traceData) {
-        traceData->setAddr(addr);
-    }
-
-    return fault;
+    assert(byte_enable.empty() || byte_enable.size() == size);
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
+            byte_enable);
  }
  
  template<class Impl>
  Fault
-BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size,
-                            Addr addr, unsigned flags, uint64_t *res)
-{
-    if (traceData) {
-        traceData->setAddr(addr);
-    }
-
-    reqMade = true;
-    Request *req = NULL;
-    Request *sreqLow = NULL;
-    Request *sreqHigh = NULL;
-
-    if (reqMade && translationStarted) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = new Request(asid, addr, size, flags, masterId(), this->pc.instAddr(),
-                          thread->contextId(), threadNumber);
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
-    }
-
-    if (fault == NoFault && translationCompleted) {
-        effAddr = req->getVaddr();
-        effSize = size;
-        effAddrValid = true;
-
-        if (cpu->checker) {
-            if (reqToVerify != NULL) {
-                delete reqToVerify;
-            }
-            reqToVerify = new Request(*req);
-        }
-        fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
-    }
-
-    return fault;
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::splitRequest(RequestPtr req, RequestPtr &sreqLow,
-                                RequestPtr &sreqHigh)
+BaseDynInst<Impl>::initiateHtmCmd(Request::Flags flags)
  {
-    // Check to see if the request crosses the next level block boundary.
-    unsigned block_size = cpu->getDataPort().peerBlockSize();
-    Addr addr = req->getVaddr();
-    Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
-    assert(split_addr <= addr || split_addr - addr < block_size);
-
-    // Spans two blocks.
-    if (split_addr > addr) {
-        req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
-    }
+    panic("Not yet implemented\n");
+    return NoFault;
  }
  
  template<class Impl>
-inline void
-BaseDynInst<Impl>::initiateTranslation(RequestPtr req, RequestPtr sreqLow,
-                                       RequestPtr sreqHigh, uint64_t *res,
-                                       BaseTLB::Mode mode)
+Fault
+BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
+                            Request::Flags flags, uint64_t *res,
+                            const std::vector<bool>& byte_enable)
  {
-    translationStarted = true;
-
-    if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, NULL, res, mode);
-
-        // One translation if the request isn't split.
-        DataTranslation<BaseDynInstPtr> *trans =
-            new DataTranslation<BaseDynInstPtr>(this, state);
-        cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
-        if (!translationCompleted) {
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    } else {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
-
-        // Two translations when the request is split.
-        DataTranslation<BaseDynInstPtr> *stransLow =
-            new DataTranslation<BaseDynInstPtr>(this, state, 0);
-        DataTranslation<BaseDynInstPtr> *stransHigh =
-            new DataTranslation<BaseDynInstPtr>(this, state, 1);
-
-        cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
-        cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
-        if (!translationCompleted) {
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    }
+    assert(byte_enable.empty() || byte_enable.size() == size);
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* st */ false, data, size, addr, flags, res, nullptr,
+            byte_enable);
  }
  
  template<class Impl>
-inline void
-BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
+Fault
+BaseDynInst<Impl>::initiateMemAMO(Addr addr, unsigned size,
+                                  Request::Flags flags,
+                                  AtomicOpFunctorPtr amo_op)
  {
-    fault = state->getFault();
-
-    if (state->isUncacheable())
-        isUncacheable = true;
-
-    if (fault == NoFault) {
-        physEffAddr = state->getPaddr();
-        memReqFlags = state->getFlags();
-
-        if (state->mainReq->isCondSwap()) {
-            assert(state->res);
-            state->mainReq->setExtraData(*state->res);
-        }
-
-    } else {
-        state->deleteReqs();
-    }
-    delete state;
-
-    translationCompleted = true;
+    // atomic memory instructions do not have data to be written to memory yet
+    // since the atomic operations will be executed directly in cache/memory.
+    // Therefore, its `data` field is nullptr.
+    // Atomic memory requests need to carry their `amo_op` fields to cache/
+    // memory
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* atomic */ false, nullptr, size, addr, flags, nullptr,
+            std::move(amo_op));
  }
  
  #endif // __CPU_BASE_DYN_INST_HH__