misc: Merge branch 'release-staging-v20.1.0.0' into develop
[gem5.git] / src / cpu / o3 / lsq_impl.hh
index cb40d552e7c8cedced14b94560e89e6622a4aefc..c4cb45ea06a922e222ea75428c5a51d5f4c72248 100644 (file)
@@ -1,4 +1,17 @@
 /*
+ * Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2005-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
  */
 
+#ifndef __CPU_O3_LSQ_IMPL_HH__
+#define __CPU_O3_LSQ_IMPL_HH__
+
 #include <algorithm>
 #include <list>
 #include <string>
 
+#include "base/logging.hh"
+#include "cpu/o3/cpu.hh"
 #include "cpu/o3/lsq.hh"
+#include "debug/Drain.hh"
+#include "debug/Fetch.hh"
+#include "debug/HtmCpu.hh"
+#include "debug/LSQ.hh"
+#include "debug/Writeback.hh"
+#include "params/DerivO3CPU.hh"
 
-template <class Impl>
-Tick
-LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
-    panic("O3CPU model does not work with atomic mode!");
-    return curTick;
-}
+using namespace std;
 
 template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
+    : cpu(cpu_ptr), iewStage(iew_ptr),
+      _cacheBlocked(false),
+      cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
+      cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
+      lsqPolicy(params->smtLSQPolicy),
+      LQEntries(params->LQEntries),
+      SQEntries(params->SQEntries),
+      maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
+                  params->smtLSQThreshold)),
+      maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
+                  params->smtLSQThreshold)),
+      dcachePort(this, cpu_ptr),
+      numThreads(params->numThreads)
 {
-    DPRINTF(LSQ, "LSQ doesn't update things on a recvFunctional.");
-}
-
-template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvStatusChange(Status status)
-{
-    if (status == RangeChange) {
-        if (!snoopRangeSent) {
-            snoopRangeSent = true;
-            sendStatusChange(Port::RangeChange);
-        }
-        return;
-    }
-    panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
-    if (pkt->isResponse()) {
-        lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
-    }
-    else {
-    //else it is a coherence request, maybe you need to do something
-        warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
-             "update LSQ for these\n");
-    }
-    return true;
-}
-
-template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvRetry()
-{
-    if (lsq->retryTid == -1)
-    {
-        //Squashed, so drop it
-        return;
-    }
-    lsq->thread[lsq->retryTid].recvRetry();
-    // Speculatively clear the retry Tid.  This will get set again if
-    // the LSQUnit was unable to complete its access.
-    lsq->retryTid = -1;
-}
-
-template <class Impl>
-LSQ<Impl>::LSQ(Params *params)
-    : dcachePort(this), LQEntries(params->LQEntries),
-      SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
-      retryTid(-1)
-{
-    DPRINTF(LSQ, "Creating LSQ object.\n");
-
-    dcachePort.snoopRangeSent = false;
+    assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
 
     //**********************************************/
     //************ Handle SMT Parameters ***********/
     //**********************************************/
-    std::string policy = params->smtLSQPolicy;
-
-    //Convert string to lowercase
-    std::transform(policy.begin(), policy.end(), policy.begin(),
-                   (int(*)(int)) tolower);
-
-    //Figure out fetch policy
-    if (policy == "dynamic") {
-        lsqPolicy = Dynamic;
-
-        maxLQEntries = LQEntries;
-        maxSQEntries = SQEntries;
 
+    /* Run SMT olicy checks. */
+        if (lsqPolicy == SMTQueuePolicy::Dynamic) {
         DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
-    } else if (policy == "partitioned") {
-        lsqPolicy = Partitioned;
-
-        //@todo:make work if part_amt doesnt divide evenly.
-        maxLQEntries = LQEntries / numThreads;
-        maxSQEntries = SQEntries / numThreads;
-
+    } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
         DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
-                "%i entries per LQ | %i entries per SQ",
+                "%i entries per LQ | %i entries per SQ\n",
                 maxLQEntries,maxSQEntries);
+    } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
 
-    } else if (policy == "threshold") {
-        lsqPolicy = Threshold;
-
-        assert(params->smtLSQThreshold > LQEntries);
-        assert(params->smtLSQThreshold > SQEntries);
-
-        //Divide up by threshold amount
-        //@todo: Should threads check the max and the total
-        //amount of the LSQ
-        maxLQEntries  = params->smtLSQThreshold;
-        maxSQEntries  = params->smtLSQThreshold;
+        assert(params->smtLSQThreshold > params->LQEntries);
+        assert(params->smtLSQThreshold > params->SQEntries);
 
         DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
-                "%i entries per LQ | %i entries per SQ",
+                "%i entries per LQ | %i entries per SQ\n",
                 maxLQEntries,maxSQEntries);
-
     } else {
-        assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
-                    "Partitioned, Threshold}");
+        panic("Invalid LSQ sharing policy. Options are: Dynamic, "
+                    "Partitioned, Threshold");
     }
 
-    //Initialize LSQs
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+    thread.reserve(numThreads);
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        thread.emplace_back(maxLQEntries, maxSQEntries);
+        thread[tid].init(cpu, iew_ptr, params, this, tid);
         thread[tid].setDcachePort(&dcachePort);
     }
 }
@@ -170,165 +118,136 @@ LSQ<Impl>::name() const
 
 template<class Impl>
 void
-LSQ<Impl>::regStats()
-{
-    //Initialize LSQs
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].regStats();
-    }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
 {
     activeThreads = at_ptr;
     assert(activeThreads != 0);
 }
 
-template<class Impl>
+template <class Impl>
 void
-LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
+LSQ<Impl>::drainSanityCheck() const
 {
-    cpu = cpu_ptr;
+    assert(isDrained());
 
-    dcachePort.setName(name());
-
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].setCPU(cpu_ptr);
-    }
+    for (ThreadID tid = 0; tid < numThreads; tid++)
+        thread[tid].drainSanityCheck();
 }
 
-template<class Impl>
-void
-LSQ<Impl>::setIEW(IEW *iew_ptr)
+template <class Impl>
+bool
+LSQ<Impl>::isDrained() const
 {
-    iewStage = iew_ptr;
+    bool drained(true);
 
-    for (int tid=0; tid < numThreads; tid++) {
-        thread[tid].setIEW(iew_ptr);
+    if (!lqEmpty()) {
+        DPRINTF(Drain, "Not drained, LQ not empty.\n");
+        drained = false;
     }
-}
 
-template <class Impl>
-void
-LSQ<Impl>::switchOut()
-{
-    for (int tid = 0; tid < numThreads; tid++) {
-        thread[tid].switchOut();
+    if (!sqEmpty()) {
+        DPRINTF(Drain, "Not drained, SQ not empty.\n");
+        drained = false;
     }
+
+    return drained;
 }
 
 template <class Impl>
 void
 LSQ<Impl>::takeOverFrom()
 {
-    for (int tid = 0; tid < numThreads; tid++) {
-        thread[tid].takeOverFrom();
-    }
-}
+    usedStorePorts = 0;
+    _cacheBlocked = false;
 
-template <class Impl>
-int
-LSQ<Impl>::entryAmount(int num_threads)
-{
-    if (lsqPolicy == Partitioned) {
-        return LQEntries / num_threads;
-    } else {
-        return 0;
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        thread[tid].takeOverFrom();
     }
 }
 
 template <class Impl>
 void
-LSQ<Impl>::resetEntries()
+LSQ<Impl>::tick()
 {
-    if (lsqPolicy != Dynamic || numThreads > 1) {
-        int active_threads = activeThreads->size();
-
-        int maxEntries;
+    // Re-issue loads which got blocked on the per-cycle load ports limit.
+    if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
+        iewStage->cacheUnblocked();
 
-        if (lsqPolicy == Partitioned) {
-            maxEntries = LQEntries / active_threads;
-        } else if (lsqPolicy == Threshold && active_threads == 1) {
-            maxEntries = LQEntries;
-        } else {
-            maxEntries = LQEntries;
-        }
-
-        std::list<unsigned>::iterator threads  = activeThreads->begin();
-        std::list<unsigned>::iterator end = activeThreads->end();
-
-        while (threads != end) {
-            unsigned tid = *threads++;
+    usedLoadPorts = 0;
+    usedStorePorts = 0;
+}
 
-            resizeEntries(maxEntries, tid);
-        }
-    }
+template<class Impl>
+bool
+LSQ<Impl>::cacheBlocked() const
+{
+    return _cacheBlocked;
 }
 
 template<class Impl>
 void
-LSQ<Impl>::removeEntries(unsigned tid)
+LSQ<Impl>::cacheBlocked(bool v)
 {
-    thread[tid].clearLQ();
-    thread[tid].clearSQ();
+    _cacheBlocked = v;
 }
 
 template<class Impl>
-void
-LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+bool
+LSQ<Impl>::cachePortAvailable(bool is_load) const
 {
-    thread[tid].resizeLQ(size);
-    thread[tid].resizeSQ(size);
+    bool ret;
+    if (is_load) {
+        ret  = usedLoadPorts < cacheLoadPorts;
+    } else {
+        ret  = usedStorePorts < cacheStorePorts;
+    }
+    return ret;
 }
 
 template<class Impl>
 void
-LSQ<Impl>::tick()
+LSQ<Impl>::cachePortBusy(bool is_load)
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
-
-    while (threads != end) {
-        unsigned tid = *threads++;
-
-        thread[tid].tick();
+    assert(cachePortAvailable(is_load));
+    if (is_load) {
+        usedLoadPorts++;
+    } else {
+        usedStorePorts++;
     }
 }
 
 template<class Impl>
 void
-LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
 {
-    unsigned tid = load_inst->threadNumber;
+    ThreadID tid = load_inst->threadNumber;
 
     thread[tid].insertLoad(load_inst);
 }
 
 template<class Impl>
 void
-LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
 {
-    unsigned tid = store_inst->threadNumber;
+    ThreadID tid = store_inst->threadNumber;
 
     thread[tid].insertStore(store_inst);
 }
 
 template<class Impl>
 Fault
-LSQ<Impl>::executeLoad(DynInstPtr &inst)
+LSQ<Impl>::executeLoad(const DynInstPtr &inst)
 {
-    unsigned tid = inst->threadNumber;
+    ThreadID tid = inst->threadNumber;
 
     return thread[tid].executeLoad(inst);
 }
 
 template<class Impl>
 Fault
-LSQ<Impl>::executeStore(DynInstPtr &inst)
+LSQ<Impl>::executeStore(const DynInstPtr &inst)
 {
-    unsigned tid = inst->threadNumber;
+    ThreadID tid = inst->threadNumber;
 
     return thread[tid].executeStore(inst);
 }
@@ -337,11 +256,11 @@ template<class Impl>
 void
 LSQ<Impl>::writebackStores()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (numStoresToWB(tid) > 0) {
             DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
@@ -357,11 +276,11 @@ bool
 LSQ<Impl>::violation()
 {
     /* Answers: Does Anybody Have a Violation?*/
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (thread[tid].violation())
             return true;
@@ -370,17 +289,93 @@ LSQ<Impl>::violation()
     return false;
 }
 
+template <class Impl>
+void
+LSQ<Impl>::recvReqRetry()
+{
+    iewStage->cacheUnblocked();
+    cacheBlocked(false);
+
+    for (ThreadID tid : *activeThreads) {
+        thread[tid].recvRetry();
+    }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::completeDataAccess(PacketPtr pkt)
+{
+    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    thread[cpu->contextToThread(senderState->contextId())]
+        .completeDataAccess(pkt);
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::recvTimingResp(PacketPtr pkt)
+{
+    if (pkt->isError())
+        DPRINTF(LSQ, "Got error packet back for address: %#X\n",
+                pkt->getAddr());
+
+    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    panic_if(!senderState, "Got packet back with unknown sender state\n");
+
+    thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
+
+    if (pkt->isInvalidate()) {
+        // This response also contains an invalidate; e.g. this can be the case
+        // if cmd is ReadRespWithInvalidate.
+        //
+        // The calling order between completeDataAccess and checkSnoop matters.
+        // By calling checkSnoop after completeDataAccess, we ensure that the
+        // fault set by checkSnoop is not lost. Calling writeback (more
+        // specifically inst->completeAcc) in completeDataAccess overwrites
+        // fault, and in case this instruction requires squashing (as
+        // determined by checkSnoop), the ReExec fault set by checkSnoop would
+        // be lost otherwise.
+
+        DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
+                pkt->getAddr());
+
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            thread[tid].checkSnoop(pkt);
+        }
+    }
+    // Update the LSQRequest state (this may delete the request)
+    senderState->request()->packetReplied();
+
+    return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
+{
+    DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
+            pkt->cmdString());
+
+    // must be a snoop
+    if (pkt->isInvalidate()) {
+        DPRINTF(LSQ, "received invalidation for addr:%#x\n",
+                pkt->getAddr());
+        for (ThreadID tid = 0; tid < numThreads; tid++) {
+            thread[tid].checkSnoop(pkt);
+        }
+    }
+}
+
 template<class Impl>
 int
 LSQ<Impl>::getCount()
 {
     unsigned total = 0;
 
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         total += getCount(tid);
     }
@@ -394,11 +389,11 @@ LSQ<Impl>::numLoads()
 {
     unsigned total = 0;
 
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         total += numLoads(tid);
     }
@@ -412,11 +407,11 @@ LSQ<Impl>::numStores()
 {
     unsigned total = 0;
 
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         total += thread[tid].numStores();
     }
@@ -425,18 +420,18 @@ LSQ<Impl>::numStores()
 }
 
 template<class Impl>
-int
-LSQ<Impl>::numLoadsReady()
+unsigned
+LSQ<Impl>::numFreeLoadEntries()
 {
     unsigned total = 0;
 
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
-        total += thread[tid].numLoadsReady();
+        total += thread[tid].numFreeLoadEntries();
     }
 
     return total;
@@ -444,17 +439,17 @@ LSQ<Impl>::numLoadsReady()
 
 template<class Impl>
 unsigned
-LSQ<Impl>::numFreeEntries()
+LSQ<Impl>::numFreeStoreEntries()
 {
     unsigned total = 0;
 
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
-        total += thread[tid].numFreeEntries();
+        total += thread[tid].numFreeStoreEntries();
     }
 
     return total;
@@ -462,23 +457,27 @@ LSQ<Impl>::numFreeEntries()
 
 template<class Impl>
 unsigned
-LSQ<Impl>::numFreeEntries(unsigned tid)
+LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
+{
+        return thread[tid].numFreeLoadEntries();
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
 {
-    //if( lsqPolicy == Dynamic )
-    //return numFreeEntries();
-    //else
-        return thread[tid].numFreeEntries();
+        return thread[tid].numFreeStoreEntries();
 }
 
 template<class Impl>
 bool
 LSQ<Impl>::isFull()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (!(thread[tid].lqFull() || thread[tid].sqFull()))
             return false;
@@ -489,25 +488,66 @@ LSQ<Impl>::isFull()
 
 template<class Impl>
 bool
-LSQ<Impl>::isFull(unsigned tid)
+LSQ<Impl>::isFull(ThreadID tid)
 {
     //@todo: Change to Calculate All Entries for
     //Dynamic Policy
-    if (lsqPolicy == Dynamic)
+    if (lsqPolicy == SMTQueuePolicy::Dynamic)
         return isFull();
     else
         return thread[tid].lqFull() || thread[tid].sqFull();
 }
 
+template<class Impl>
+bool
+LSQ<Impl>::isEmpty() const
+{
+    return lqEmpty() && sqEmpty();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqEmpty() const
+{
+    list<ThreadID>::const_iterator threads = activeThreads->begin();
+    list<ThreadID>::const_iterator end = activeThreads->end();
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+
+        if (!thread[tid].lqEmpty())
+            return false;
+    }
+
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqEmpty() const
+{
+    list<ThreadID>::const_iterator threads = activeThreads->begin();
+    list<ThreadID>::const_iterator end = activeThreads->end();
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+
+        if (!thread[tid].sqEmpty())
+            return false;
+    }
+
+    return true;
+}
+
 template<class Impl>
 bool
 LSQ<Impl>::lqFull()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (!thread[tid].lqFull())
             return false;
@@ -518,11 +558,11 @@ LSQ<Impl>::lqFull()
 
 template<class Impl>
 bool
-LSQ<Impl>::lqFull(unsigned tid)
+LSQ<Impl>::lqFull(ThreadID tid)
 {
     //@todo: Change to Calculate All Entries for
     //Dynamic Policy
-    if( lsqPolicy == Dynamic )
+    if (lsqPolicy == SMTQueuePolicy::Dynamic)
         return lqFull();
     else
         return thread[tid].lqFull();
@@ -532,11 +572,11 @@ template<class Impl>
 bool
 LSQ<Impl>::sqFull()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (!sqFull(tid))
             return false;
@@ -547,11 +587,11 @@ LSQ<Impl>::sqFull()
 
 template<class Impl>
 bool
-LSQ<Impl>::sqFull(unsigned tid)
+LSQ<Impl>::sqFull(ThreadID tid)
 {
      //@todo: Change to Calculate All Entries for
     //Dynamic Policy
-    if( lsqPolicy == Dynamic )
+    if (lsqPolicy == SMTQueuePolicy::Dynamic)
         return sqFull();
     else
         return thread[tid].sqFull();
@@ -561,11 +601,11 @@ template<class Impl>
 bool
 LSQ<Impl>::isStalled()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         if (!thread[tid].isStalled())
             return false;
@@ -576,9 +616,9 @@ LSQ<Impl>::isStalled()
 
 template<class Impl>
 bool
-LSQ<Impl>::isStalled(unsigned tid)
+LSQ<Impl>::isStalled(ThreadID tid)
 {
-    if( lsqPolicy == Dynamic )
+    if (lsqPolicy == SMTQueuePolicy::Dynamic)
         return isStalled();
     else
         return thread[tid].isStalled();
@@ -588,49 +628,669 @@ template<class Impl>
 bool
 LSQ<Impl>::hasStoresToWB()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
-
-    if (threads == end)
-        return false;
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
-        if (!hasStoresToWB(tid))
-            return false;
+        if (hasStoresToWB(tid))
+            return true;
     }
 
-    return true;
+    return false;
 }
 
 template<class Impl>
 bool
 LSQ<Impl>::willWB()
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::iterator threads = activeThreads->begin();
+    list<ThreadID>::iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
-        if (!willWB(tid))
-            return false;
+        if (willWB(tid))
+            return true;
     }
 
-    return true;
+    return false;
 }
 
 template<class Impl>
 void
-LSQ<Impl>::dumpInsts()
+LSQ<Impl>::dumpInsts() const
 {
-    std::list<unsigned>::iterator threads = activeThreads->begin();
-    std::list<unsigned>::iterator end = activeThreads->end();
+    list<ThreadID>::const_iterator threads = activeThreads->begin();
+    list<ThreadID>::const_iterator end = activeThreads->end();
 
     while (threads != end) {
-        unsigned tid = *threads++;
+        ThreadID tid = *threads++;
 
         thread[tid].dumpInsts();
     }
 }
+
+template<class Impl>
+Fault
+LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+                       unsigned int size, Addr addr, Request::Flags flags,
+                       uint64_t *res, AtomicOpFunctorPtr amo_op,
+                       const std::vector<bool>& byte_enable)
+{
+    // This comming request can be either load, store or atomic.
+    // Atomic request has a corresponding pointer to its atomic memory
+    // operation
+    bool isAtomic M5_VAR_USED = !isLoad && amo_op;
+
+    ThreadID tid = cpu->contextToThread(inst->contextId());
+    auto cacheLineSize = cpu->cacheLineSize();
+    bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
+    LSQRequest* req = nullptr;
+
+    // Atomic requests that access data across cache line boundary are
+    // currently not allowed since the cache does not guarantee corresponding
+    // atomic memory operations to be executed atomically across a cache line.
+    // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+    // the cache needs to be modified to perform atomic update to both cache
+    // lines. For now, such cross-line update is not supported.
+    assert(!isAtomic || (isAtomic && !needs_burst));
+
+    const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
+
+    if (inst->translationStarted()) {
+        req = inst->savedReq;
+        assert(req);
+    } else {
+        if (htm_cmd) {
+            assert(addr == 0x0lu);
+            assert(size == 8);
+            req = new HtmCmdRequest(&thread[tid], inst, flags);
+        } else if (needs_burst) {
+            req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
+                    size, flags, data, res);
+        } else {
+            req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
+                    size, flags, data, res, std::move(amo_op));
+        }
+        assert(req);
+        if (!byte_enable.empty()) {
+            req->_byteEnable = byte_enable;
+        }
+        inst->setRequest();
+        req->taskId(cpu->taskId());
+
+        // There might be fault from a previous execution attempt if this is
+        // a strictly ordered load
+        inst->getFault() = NoFault;
+
+        req->initiateTranslation();
+    }
+
+    /* This is the place were instructions get the effAddr. */
+    if (req->isTranslationComplete()) {
+        if (req->isMemAccessRequired()) {
+            inst->effAddr = req->getVaddr();
+            inst->effSize = size;
+            inst->effAddrValid(true);
+
+            if (cpu->checker) {
+                inst->reqToVerify = std::make_shared<Request>(*req->request());
+            }
+            Fault fault;
+            if (isLoad)
+                fault = cpu->read(req, inst->lqIdx);
+            else
+                fault = cpu->write(req, data, inst->sqIdx);
+            // inst->getFault() may have the first-fault of a
+            // multi-access split request at this point.
+            // Overwrite that only if we got another type of fault
+            // (e.g. re-exec).
+            if (fault != NoFault)
+                inst->getFault() = fault;
+        } else if (isLoad) {
+            inst->setMemAccPredicate(false);
+            // Commit will have to clean up whatever happened.  Set this
+            // instruction as executed.
+            inst->setExecuted();
+        }
+    }
+
+    if (inst->traceData)
+        inst->traceData->setMem(addr, size, flags);
+
+    return inst->getFault();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
+        ThreadContext* tc, BaseTLB::Mode mode)
+{
+    _fault.push_back(fault);
+    numInTranslationFragments = 0;
+    numTranslatedFragments = 1;
+    /* If the instruction has been squahsed, let the request know
+     * as it may have to self-destruct. */
+    if (_inst->isSquashed()) {
+        this->squashTranslation();
+    } else {
+        _inst->strictlyOrdered(req->isStrictlyOrdered());
+
+        flags.set(Flag::TranslationFinished);
+        if (fault == NoFault) {
+            _inst->physEffAddr = req->getPaddr();
+            _inst->memReqFlags = req->getFlags();
+            if (req->isCondSwap()) {
+                assert(_res);
+                req->setExtraData(*_res);
+            }
+            setState(State::Request);
+        } else {
+            setState(State::Fault);
+        }
+
+        LSQRequest::_inst->fault = fault;
+        LSQRequest::_inst->translationCompleted(true);
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
+        ThreadContext* tc, BaseTLB::Mode mode)
+{
+    int i;
+    for (i = 0; i < _requests.size() && _requests[i] != req; i++);
+    assert(i < _requests.size());
+    _fault[i] = fault;
+
+    numInTranslationFragments--;
+    numTranslatedFragments++;
+
+    if (fault == NoFault)
+        mainReq->setFlags(req->getFlags());
+
+    if (numTranslatedFragments == _requests.size()) {
+        if (_inst->isSquashed()) {
+            this->squashTranslation();
+        } else {
+            _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
+            flags.set(Flag::TranslationFinished);
+            _inst->translationCompleted(true);
+
+            for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
+            if (i > 0) {
+                _inst->physEffAddr = request(0)->getPaddr();
+                _inst->memReqFlags = mainReq->getFlags();
+                if (mainReq->isCondSwap()) {
+                    assert (i == _fault.size());
+                    assert(_res);
+                    mainReq->setExtraData(*_res);
+                }
+                if (i == _fault.size()) {
+                    _inst->fault = NoFault;
+                    setState(State::Request);
+                } else {
+                  _inst->fault = _fault[i];
+                  setState(State::PartialFault);
+                }
+            } else {
+                _inst->fault = _fault[0];
+                setState(State::Fault);
+            }
+        }
+
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::initiateTranslation()
+{
+    assert(_requests.size() == 0);
+
+    this->addRequest(_addr, _size, _byteEnable);
+
+    if (_requests.size() > 0) {
+        _requests.back()->setReqInstSeqNum(_inst->seqNum);
+        _requests.back()->taskId(_taskId);
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags.set(Flag::TranslationStarted);
+
+        _inst->savedReq = this;
+        sendFragmentToTranslation(0);
+    } else {
+        _inst->setMemAccPredicate(false);
+    }
+}
+
+template<class Impl>
+PacketPtr
+LSQ<Impl>::SplitDataRequest::mainPacket()
+{
+    return _mainPacket;
+}
+
+template<class Impl>
+RequestPtr
+LSQ<Impl>::SplitDataRequest::mainRequest()
+{
+    return mainReq;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::initiateTranslation()
+{
+    auto cacheLineSize = _port.cacheLineSize();
+    Addr base_addr = _addr;
+    Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
+    Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
+    uint32_t size_so_far = 0;
+
+    mainReq = std::make_shared<Request>(base_addr,
+                _size, _flags, _inst->requestorId(),
+                _inst->instAddr(), _inst->contextId());
+    if (!_byteEnable.empty()) {
+        mainReq->setByteEnable(_byteEnable);
+    }
+
+    // Paddr is not used in mainReq. However, we will accumulate the flags
+    // from the sub requests into mainReq by calling setFlags() in finish().
+    // setFlags() assumes that paddr is set so flip the paddr valid bit here to
+    // avoid a potential assert in setFlags() when we call it from  finish().
+    mainReq->setPaddr(0);
+
+    /* Get the pre-fix, possibly unaligned. */
+    if (_byteEnable.empty()) {
+        this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+    } else {
+        auto it_start = _byteEnable.begin();
+        auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+        this->addRequest(base_addr, next_addr - base_addr,
+                         std::vector<bool>(it_start, it_end));
+    }
+    size_so_far = next_addr - base_addr;
+
+    /* We are block aligned now, reading whole blocks. */
+    base_addr = next_addr;
+    while (base_addr != final_addr) {
+        if (_byteEnable.empty()) {
+            this->addRequest(base_addr, cacheLineSize, _byteEnable);
+        } else {
+            auto it_start = _byteEnable.begin() + size_so_far;
+            auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+            this->addRequest(base_addr, cacheLineSize,
+                             std::vector<bool>(it_start, it_end));
+        }
+        size_so_far += cacheLineSize;
+        base_addr += cacheLineSize;
+    }
+
+    /* Deal with the tail. */
+    if (size_so_far < _size) {
+        if (_byteEnable.empty()) {
+            this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+        } else {
+            auto it_start = _byteEnable.begin() + size_so_far;
+            auto it_end = _byteEnable.end();
+            this->addRequest(base_addr, _size - size_so_far,
+                             std::vector<bool>(it_start, it_end));
+        }
+    }
+
+    if (_requests.size() > 0) {
+        /* Setup the requests and send them to translation. */
+        for (auto& r: _requests) {
+            r->setReqInstSeqNum(_inst->seqNum);
+            r->taskId(_taskId);
+        }
+
+        _inst->translationStarted(true);
+        setState(State::Translation);
+        flags.set(Flag::TranslationStarted);
+        this->_inst->savedReq = this;
+        numInTranslationFragments = 0;
+        numTranslatedFragments = 0;
+        _fault.resize(_requests.size());
+
+        for (uint32_t i = 0; i < _requests.size(); i++) {
+            sendFragmentToTranslation(i);
+        }
+    } else {
+        _inst->setMemAccPredicate(false);
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
+{
+    numInTranslationFragments++;
+    _port.dTLB()->translateTiming(
+            this->request(i),
+            this->_inst->thread->getTC(), this,
+            this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
+{
+    assert(_numOutstandingPackets == 1);
+    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    flags.set(Flag::Complete);
+    state->outstanding--;
+    assert(pkt == _packets.front());
+    _port.completeDataAccess(pkt);
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
+{
+    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    uint32_t pktIdx = 0;
+    while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
+        pktIdx++;
+    assert(pktIdx < _packets.size());
+    numReceivedPackets++;
+    state->outstanding--;
+    if (numReceivedPackets == _packets.size()) {
+        flags.set(Flag::Complete);
+        /* Assemble packets. */
+        PacketPtr resp = isLoad()
+            ? Packet::createRead(mainReq)
+            : Packet::createWrite(mainReq);
+        if (isLoad())
+            resp->dataStatic(_inst->memData);
+        else
+            resp->dataStatic(_data);
+        resp->senderState = _senderState;
+        _port.completeDataAccess(resp);
+        delete resp;
+    }
+    return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::buildPackets()
+{
+    assert(_senderState);
+    /* Retries do not create new packets. */
+    if (_packets.size() == 0) {
+        _packets.push_back(
+                isLoad()
+                    ?  Packet::createRead(request())
+                    :  Packet::createWrite(request()));
+        _packets.back()->dataStatic(_inst->memData);
+        _packets.back()->senderState = _senderState;
+
+        // hardware transactional memory
+        // If request originates in a transaction (not necessarily a HtmCmd),
+        // then the packet should be marked as such.
+        if (_inst->inHtmTransactionalState()) {
+            _packets.back()->setHtmTransactional(
+                _inst->getHtmTransactionUid());
+
+            DPRINTF(HtmCpu,
+              "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
+              isLoad() ? "LD" : "ST",
+              _inst->instAddr(),
+              _packets.back()->req->hasVaddr() ?
+                  _packets.back()->req->getVaddr() : 0lu,
+              _packets.back()->getAddr(),
+              _inst->getHtmTransactionUid());
+        }
+    }
+    assert(_packets.size() == 1);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::buildPackets()
+{
+    /* Extra data?? */
+    Addr base_address = _addr;
+
+    if (_packets.size() == 0) {
+        /* New stuff */
+        if (isLoad()) {
+            _mainPacket = Packet::createRead(mainReq);
+            _mainPacket->dataStatic(_inst->memData);
+
+            // hardware transactional memory
+            // If request originates in a transaction,
+            // packet should be marked as such
+            if (_inst->inHtmTransactionalState()) {
+                _mainPacket->setHtmTransactional(
+                    _inst->getHtmTransactionUid());
+                DPRINTF(HtmCpu,
+                  "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+                  _inst->instAddr(),
+                  _mainPacket->req->hasVaddr() ?
+                      _mainPacket->req->getVaddr() : 0lu,
+                  _mainPacket->getAddr(),
+                  _inst->getHtmTransactionUid());
+            }
+        }
+        for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+            RequestPtr r = _requests[i];
+            PacketPtr pkt = isLoad() ? Packet::createRead(r)
+                                     : Packet::createWrite(r);
+            ptrdiff_t offset = r->getVaddr() - base_address;
+            if (isLoad()) {
+                pkt->dataStatic(_inst->memData + offset);
+            } else {
+                uint8_t* req_data = new uint8_t[r->getSize()];
+                std::memcpy(req_data,
+                        _inst->memData + offset,
+                        r->getSize());
+                pkt->dataDynamic(req_data);
+            }
+            pkt->senderState = _senderState;
+            _packets.push_back(pkt);
+
+            // hardware transactional memory
+            // If request originates in a transaction,
+            // packet should be marked as such
+            if (_inst->inHtmTransactionalState()) {
+                _packets.back()->setHtmTransactional(
+                    _inst->getHtmTransactionUid());
+                DPRINTF(HtmCpu,
+                  "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+                  isLoad() ? "LD" : "ST",
+                  i+1,
+                  _inst->instAddr(),
+                  _packets.back()->req->hasVaddr() ?
+                      _packets.back()->req->getVaddr() : 0lu,
+                  _packets.back()->getAddr(),
+                  _inst->getHtmTransactionUid());
+            }
+        }
+    }
+    assert(_packets.size() > 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::sendPacketToCache()
+{
+    assert(_numOutstandingPackets == 0);
+    if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
+        _numOutstandingPackets = 1;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::sendPacketToCache()
+{
+    /* Try to send the packets. */
+    while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
+            lsqUnit()->trySendPacket(isLoad(),
+                _packets.at(numReceivedPackets + _numOutstandingPackets))) {
+        _numOutstandingPackets++;
+    }
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SingleDataRequest::handleLocalAccess(
+        ThreadContext *thread, PacketPtr pkt)
+{
+    return pkt->req->localAccessor(thread, pkt);
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SplitDataRequest::handleLocalAccess(
+        ThreadContext *thread, PacketPtr mainPkt)
+{
+    Cycles delay(0);
+    unsigned offset = 0;
+
+    for (auto r: _requests) {
+        PacketPtr pkt =
+            new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
+        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+        Cycles d = r->localAccessor(thread, pkt);
+        if (d > delay)
+            delay = d;
+        offset += r->getSize();
+        delete pkt;
+    }
+    return delay;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+    return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
+}
+
+/**
+ * Caches may probe into the load-store queue to enforce memory ordering
+ * guarantees. This method supports probes by providing a mechanism to compare
+ * snoop messages with requests tracked by the load-store queue.
+ *
+ * Consistency models must enforce ordering constraints. TSO, for instance,
+ * must prevent memory reorderings except stores which are reordered after
+ * loads. The reordering restrictions negatively impact performance by
+ * cutting down on memory level parallelism. However, the core can regain
+ * performance by generating speculative loads. Speculative loads may issue
+ * without affecting correctness if precautions are taken to handle invalid
+ * memory orders. The load queue must squash under memory model violations.
+ * Memory model violations may occur when block ownership is granted to
+ * another core or the block cannot be accurately monitored by the load queue.
+ */
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+    bool is_hit = false;
+    for (auto &r: _requests) {
+       /**
+        * The load-store queue handles partial faults which complicates this
+        * method. Physical addresses must be compared between requests and
+        * snoops. Some requests will not have a valid physical address, since
+        * partial faults may have outstanding translations. Therefore, the
+        * existence of a valid request address must be checked before
+        * comparing block hits. We assume no pipeline squash is needed if a
+        * valid request address does not exist.
+        */
+        if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
+            is_hit = true;
+            break;
+        }
+    }
+    return is_hit;
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
+{
+    return lsq->recvTimingResp(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
+{
+    for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+        if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu->wakeup(tid);
+        }
+    }
+    lsq->recvTimingSnoopReq(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvReqRetry()
+{
+    lsq->recvReqRetry();
+}
+
+template<class Impl>
+LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
+                  const DynInstPtr& inst,
+                  const Request::Flags& flags_) :
+    SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
+        nullptr, nullptr, nullptr)
+{
+    assert(_requests.size() == 0);
+
+    this->addRequest(_addr, _size, _byteEnable);
+
+    if (_requests.size() > 0) {
+        _requests.back()->setReqInstSeqNum(_inst->seqNum);
+        _requests.back()->taskId(_taskId);
+        _requests.back()->setPaddr(_addr);
+        _requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
+
+        _inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
+        _inst->fault = NoFault;
+        _inst->physEffAddr = _requests.back()->getPaddr();
+        _inst->memReqFlags = _requests.back()->getFlags();
+        _inst->savedReq = this;
+
+        setState(State::Translation);
+    } else {
+        panic("unexpected behaviour");
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::initiateTranslation()
+{
+    // Transaction commands are implemented as loads to avoid significant
+    // changes to the cpu and memory interfaces
+    // The virtual and physical address uses a dummy value of 0x00
+    // Address translation does not really occur thus the code below
+
+    flags.set(Flag::TranslationStarted);
+    flags.set(Flag::TranslationFinished);
+
+    _inst->translationStarted(true);
+    _inst->translationCompleted(true);
+
+    setState(State::Request);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
+        ThreadContext* tc, BaseTLB::Mode mode)
+{
+    panic("unexpected behaviour");
+}
+
+#endif//__CPU_O3_LSQ_IMPL_HH__