/*
+ * Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
+ * Copyright (c) 2013 Advanced Micro Devices, Inc.
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
*/
+#ifndef __CPU_O3_LSQ_IMPL_HH__
+#define __CPU_O3_LSQ_IMPL_HH__
+
#include <algorithm>
#include <list>
#include <string>
+#include "base/logging.hh"
+#include "cpu/o3/cpu.hh"
#include "cpu/o3/lsq.hh"
+#include "debug/Drain.hh"
+#include "debug/Fetch.hh"
+#include "debug/HtmCpu.hh"
+#include "debug/LSQ.hh"
+#include "debug/Writeback.hh"
+#include "params/DerivO3CPU.hh"
-template <class Impl>
-Tick
-LSQ<Impl>::DcachePort::recvAtomic(PacketPtr pkt)
-{
- panic("O3CPU model does not work with atomic mode!");
- return curTick;
-}
+using namespace std;
template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvFunctional(PacketPtr pkt)
+LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
+ : cpu(cpu_ptr), iewStage(iew_ptr),
+ _cacheBlocked(false),
+ cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
+ cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
+ lsqPolicy(params->smtLSQPolicy),
+ LQEntries(params->LQEntries),
+ SQEntries(params->SQEntries),
+ maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
+ params->smtLSQThreshold)),
+ maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
+ params->smtLSQThreshold)),
+ dcachePort(this, cpu_ptr),
+ numThreads(params->numThreads)
{
- DPRINTF(LSQ, "LSQ doesn't update things on a recvFunctional.");
-}
-
-template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvStatusChange(Status status)
-{
- if (status == RangeChange) {
- if (!snoopRangeSent) {
- snoopRangeSent = true;
- sendStatusChange(Port::RangeChange);
- }
- return;
- }
- panic("O3CPU doesn't expect recvStatusChange callback!");
-}
-
-template <class Impl>
-bool
-LSQ<Impl>::DcachePort::recvTiming(PacketPtr pkt)
-{
- if (pkt->isResponse()) {
- lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt);
- }
- else {
- //else it is a coherence request, maybe you need to do something
- warn("Recieved a coherence request (Invalidate?), 03CPU doesn't"
- "update LSQ for these\n");
- }
- return true;
-}
-
-template <class Impl>
-void
-LSQ<Impl>::DcachePort::recvRetry()
-{
- if (lsq->retryTid == -1)
- {
- //Squashed, so drop it
- return;
- }
- lsq->thread[lsq->retryTid].recvRetry();
- // Speculatively clear the retry Tid. This will get set again if
- // the LSQUnit was unable to complete its access.
- lsq->retryTid = -1;
-}
-
-template <class Impl>
-LSQ<Impl>::LSQ(Params *params)
- : dcachePort(this), LQEntries(params->LQEntries),
- SQEntries(params->SQEntries), numThreads(params->numberOfThreads),
- retryTid(-1)
-{
- DPRINTF(LSQ, "Creating LSQ object.\n");
-
- dcachePort.snoopRangeSent = false;
+ assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
//**********************************************/
//************ Handle SMT Parameters ***********/
//**********************************************/
- std::string policy = params->smtLSQPolicy;
-
- //Convert string to lowercase
- std::transform(policy.begin(), policy.end(), policy.begin(),
- (int(*)(int)) tolower);
-
- //Figure out fetch policy
- if (policy == "dynamic") {
- lsqPolicy = Dynamic;
-
- maxLQEntries = LQEntries;
- maxSQEntries = SQEntries;
+ /* Run SMT olicy checks. */
+ if (lsqPolicy == SMTQueuePolicy::Dynamic) {
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
-
- } else if (policy == "partitioned") {
- lsqPolicy = Partitioned;
-
- //@todo:make work if part_amt doesnt divide evenly.
- maxLQEntries = LQEntries / numThreads;
- maxSQEntries = SQEntries / numThreads;
-
+ } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
- "%i entries per LQ | %i entries per SQ",
+ "%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
+ } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
- } else if (policy == "threshold") {
- lsqPolicy = Threshold;
-
- assert(params->smtLSQThreshold > LQEntries);
- assert(params->smtLSQThreshold > SQEntries);
-
- //Divide up by threshold amount
- //@todo: Should threads check the max and the total
- //amount of the LSQ
- maxLQEntries = params->smtLSQThreshold;
- maxSQEntries = params->smtLSQThreshold;
+ assert(params->smtLSQThreshold > params->LQEntries);
+ assert(params->smtLSQThreshold > params->SQEntries);
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
- "%i entries per LQ | %i entries per SQ",
+ "%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
-
} else {
- assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
- "Partitioned, Threshold}");
+ panic("Invalid LSQ sharing policy. Options are: Dynamic, "
+ "Partitioned, Threshold");
}
- //Initialize LSQs
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid);
+ thread.reserve(numThreads);
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ thread.emplace_back(maxLQEntries, maxSQEntries);
+ thread[tid].init(cpu, iew_ptr, params, this, tid);
thread[tid].setDcachePort(&dcachePort);
}
}
template<class Impl>
void
-LSQ<Impl>::regStats()
-{
- //Initialize LSQs
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].regStats();
- }
-}
-
-template<class Impl>
-void
-LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
+LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
assert(activeThreads != 0);
}
-template<class Impl>
+template <class Impl>
void
-LSQ<Impl>::setCPU(O3CPU *cpu_ptr)
+LSQ<Impl>::drainSanityCheck() const
{
- cpu = cpu_ptr;
+ assert(isDrained());
- dcachePort.setName(name());
-
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].setCPU(cpu_ptr);
- }
+ for (ThreadID tid = 0; tid < numThreads; tid++)
+ thread[tid].drainSanityCheck();
}
-template<class Impl>
-void
-LSQ<Impl>::setIEW(IEW *iew_ptr)
+template <class Impl>
+bool
+LSQ<Impl>::isDrained() const
{
- iewStage = iew_ptr;
+ bool drained(true);
- for (int tid=0; tid < numThreads; tid++) {
- thread[tid].setIEW(iew_ptr);
+ if (!lqEmpty()) {
+ DPRINTF(Drain, "Not drained, LQ not empty.\n");
+ drained = false;
}
-}
-template <class Impl>
-void
-LSQ<Impl>::switchOut()
-{
- for (int tid = 0; tid < numThreads; tid++) {
- thread[tid].switchOut();
+ if (!sqEmpty()) {
+ DPRINTF(Drain, "Not drained, SQ not empty.\n");
+ drained = false;
}
+
+ return drained;
}
template <class Impl>
void
LSQ<Impl>::takeOverFrom()
{
- for (int tid = 0; tid < numThreads; tid++) {
- thread[tid].takeOverFrom();
- }
-}
+ usedStorePorts = 0;
+ _cacheBlocked = false;
-template <class Impl>
-int
-LSQ<Impl>::entryAmount(int num_threads)
-{
- if (lsqPolicy == Partitioned) {
- return LQEntries / num_threads;
- } else {
- return 0;
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ thread[tid].takeOverFrom();
}
}
template <class Impl>
void
-LSQ<Impl>::resetEntries()
+LSQ<Impl>::tick()
{
- if (lsqPolicy != Dynamic || numThreads > 1) {
- int active_threads = activeThreads->size();
-
- int maxEntries;
+ // Re-issue loads which got blocked on the per-cycle load ports limit.
+ if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
+ iewStage->cacheUnblocked();
- if (lsqPolicy == Partitioned) {
- maxEntries = LQEntries / active_threads;
- } else if (lsqPolicy == Threshold && active_threads == 1) {
- maxEntries = LQEntries;
- } else {
- maxEntries = LQEntries;
- }
-
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
-
- while (threads != end) {
- unsigned tid = *threads++;
+ usedLoadPorts = 0;
+ usedStorePorts = 0;
+}
- resizeEntries(maxEntries, tid);
- }
- }
+template<class Impl>
+bool
+LSQ<Impl>::cacheBlocked() const
+{
+ return _cacheBlocked;
}
template<class Impl>
void
-LSQ<Impl>::removeEntries(unsigned tid)
+LSQ<Impl>::cacheBlocked(bool v)
{
- thread[tid].clearLQ();
- thread[tid].clearSQ();
+ _cacheBlocked = v;
}
template<class Impl>
-void
-LSQ<Impl>::resizeEntries(unsigned size,unsigned tid)
+bool
+LSQ<Impl>::cachePortAvailable(bool is_load) const
{
- thread[tid].resizeLQ(size);
- thread[tid].resizeSQ(size);
+ bool ret;
+ if (is_load) {
+ ret = usedLoadPorts < cacheLoadPorts;
+ } else {
+ ret = usedStorePorts < cacheStorePorts;
+ }
+ return ret;
}
template<class Impl>
void
-LSQ<Impl>::tick()
+LSQ<Impl>::cachePortBusy(bool is_load)
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
-
- while (threads != end) {
- unsigned tid = *threads++;
-
- thread[tid].tick();
+ assert(cachePortAvailable(is_load));
+ if (is_load) {
+ usedLoadPorts++;
+ } else {
+ usedStorePorts++;
}
}
template<class Impl>
void
-LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
+LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
{
- unsigned tid = load_inst->threadNumber;
+ ThreadID tid = load_inst->threadNumber;
thread[tid].insertLoad(load_inst);
}
template<class Impl>
void
-LSQ<Impl>::insertStore(DynInstPtr &store_inst)
+LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
{
- unsigned tid = store_inst->threadNumber;
+ ThreadID tid = store_inst->threadNumber;
thread[tid].insertStore(store_inst);
}
template<class Impl>
Fault
-LSQ<Impl>::executeLoad(DynInstPtr &inst)
+LSQ<Impl>::executeLoad(const DynInstPtr &inst)
{
- unsigned tid = inst->threadNumber;
+ ThreadID tid = inst->threadNumber;
return thread[tid].executeLoad(inst);
}
template<class Impl>
Fault
-LSQ<Impl>::executeStore(DynInstPtr &inst)
+LSQ<Impl>::executeStore(const DynInstPtr &inst)
{
- unsigned tid = inst->threadNumber;
+ ThreadID tid = inst->threadNumber;
return thread[tid].executeStore(inst);
}
void
LSQ<Impl>::writebackStores()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (numStoresToWB(tid) > 0) {
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
LSQ<Impl>::violation()
{
/* Answers: Does Anybody Have a Violation?*/
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (thread[tid].violation())
return true;
return false;
}
+template <class Impl>
+void
+LSQ<Impl>::recvReqRetry()
+{
+ iewStage->cacheUnblocked();
+ cacheBlocked(false);
+
+ for (ThreadID tid : *activeThreads) {
+ thread[tid].recvRetry();
+ }
+}
+
+template <class Impl>
+void
+LSQ<Impl>::completeDataAccess(PacketPtr pkt)
+{
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ thread[cpu->contextToThread(senderState->contextId())]
+ .completeDataAccess(pkt);
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::recvTimingResp(PacketPtr pkt)
+{
+ if (pkt->isError())
+ DPRINTF(LSQ, "Got error packet back for address: %#X\n",
+ pkt->getAddr());
+
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ panic_if(!senderState, "Got packet back with unknown sender state\n");
+
+ thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
+
+ if (pkt->isInvalidate()) {
+ // This response also contains an invalidate; e.g. this can be the case
+ // if cmd is ReadRespWithInvalidate.
+ //
+ // The calling order between completeDataAccess and checkSnoop matters.
+ // By calling checkSnoop after completeDataAccess, we ensure that the
+ // fault set by checkSnoop is not lost. Calling writeback (more
+ // specifically inst->completeAcc) in completeDataAccess overwrites
+ // fault, and in case this instruction requires squashing (as
+ // determined by checkSnoop), the ReExec fault set by checkSnoop would
+ // be lost otherwise.
+
+ DPRINTF(LSQ, "received invalidation with response for addr:%#x\n",
+ pkt->getAddr());
+
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ thread[tid].checkSnoop(pkt);
+ }
+ }
+ // Update the LSQRequest state (this may delete the request)
+ senderState->request()->packetReplied();
+
+ return true;
+}
+
+template <class Impl>
+void
+LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
+{
+ DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
+ pkt->cmdString());
+
+ // must be a snoop
+ if (pkt->isInvalidate()) {
+ DPRINTF(LSQ, "received invalidation for addr:%#x\n",
+ pkt->getAddr());
+ for (ThreadID tid = 0; tid < numThreads; tid++) {
+ thread[tid].checkSnoop(pkt);
+ }
+ }
+}
+
template<class Impl>
int
LSQ<Impl>::getCount()
{
unsigned total = 0;
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
total += getCount(tid);
}
{
unsigned total = 0;
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
total += numLoads(tid);
}
{
unsigned total = 0;
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
total += thread[tid].numStores();
}
}
template<class Impl>
-int
-LSQ<Impl>::numLoadsReady()
+unsigned
+LSQ<Impl>::numFreeLoadEntries()
{
unsigned total = 0;
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
- total += thread[tid].numLoadsReady();
+ total += thread[tid].numFreeLoadEntries();
}
return total;
template<class Impl>
unsigned
-LSQ<Impl>::numFreeEntries()
+LSQ<Impl>::numFreeStoreEntries()
{
unsigned total = 0;
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
- total += thread[tid].numFreeEntries();
+ total += thread[tid].numFreeStoreEntries();
}
return total;
template<class Impl>
unsigned
-LSQ<Impl>::numFreeEntries(unsigned tid)
+LSQ<Impl>::numFreeLoadEntries(ThreadID tid)
+{
+ return thread[tid].numFreeLoadEntries();
+}
+
+template<class Impl>
+unsigned
+LSQ<Impl>::numFreeStoreEntries(ThreadID tid)
{
- //if( lsqPolicy == Dynamic )
- //return numFreeEntries();
- //else
- return thread[tid].numFreeEntries();
+ return thread[tid].numFreeStoreEntries();
}
template<class Impl>
bool
LSQ<Impl>::isFull()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (!(thread[tid].lqFull() || thread[tid].sqFull()))
return false;
template<class Impl>
bool
-LSQ<Impl>::isFull(unsigned tid)
+LSQ<Impl>::isFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if (lsqPolicy == Dynamic)
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isFull();
else
return thread[tid].lqFull() || thread[tid].sqFull();
}
+template<class Impl>
+bool
+LSQ<Impl>::isEmpty() const
+{
+ return lqEmpty() && sqEmpty();
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::lqEmpty() const
+{
+ list<ThreadID>::const_iterator threads = activeThreads->begin();
+ list<ThreadID>::const_iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (!thread[tid].lqEmpty())
+ return false;
+ }
+
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::sqEmpty() const
+{
+ list<ThreadID>::const_iterator threads = activeThreads->begin();
+ list<ThreadID>::const_iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (!thread[tid].sqEmpty())
+ return false;
+ }
+
+ return true;
+}
+
template<class Impl>
bool
LSQ<Impl>::lqFull()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (!thread[tid].lqFull())
return false;
template<class Impl>
bool
-LSQ<Impl>::lqFull(unsigned tid)
+LSQ<Impl>::lqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if( lsqPolicy == Dynamic )
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return lqFull();
else
return thread[tid].lqFull();
bool
LSQ<Impl>::sqFull()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (!sqFull(tid))
return false;
template<class Impl>
bool
-LSQ<Impl>::sqFull(unsigned tid)
+LSQ<Impl>::sqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if( lsqPolicy == Dynamic )
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return sqFull();
else
return thread[tid].sqFull();
bool
LSQ<Impl>::isStalled()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
if (!thread[tid].isStalled())
return false;
template<class Impl>
bool
-LSQ<Impl>::isStalled(unsigned tid)
+LSQ<Impl>::isStalled(ThreadID tid)
{
- if( lsqPolicy == Dynamic )
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isStalled();
else
return thread[tid].isStalled();
bool
LSQ<Impl>::hasStoresToWB()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
-
- if (threads == end)
- return false;
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
- if (!hasStoresToWB(tid))
- return false;
+ if (hasStoresToWB(tid))
+ return true;
}
- return true;
+ return false;
}
template<class Impl>
bool
LSQ<Impl>::willWB()
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
- if (!willWB(tid))
- return false;
+ if (willWB(tid))
+ return true;
}
- return true;
+ return false;
}
template<class Impl>
void
-LSQ<Impl>::dumpInsts()
+LSQ<Impl>::dumpInsts() const
{
- std::list<unsigned>::iterator threads = activeThreads->begin();
- std::list<unsigned>::iterator end = activeThreads->end();
+ list<ThreadID>::const_iterator threads = activeThreads->begin();
+ list<ThreadID>::const_iterator end = activeThreads->end();
while (threads != end) {
- unsigned tid = *threads++;
+ ThreadID tid = *threads++;
thread[tid].dumpInsts();
}
}
+
+template<class Impl>
+Fault
+LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res, AtomicOpFunctorPtr amo_op,
+ const std::vector<bool>& byte_enable)
+{
+ // This comming request can be either load, store or atomic.
+ // Atomic request has a corresponding pointer to its atomic memory
+ // operation
+ bool isAtomic M5_VAR_USED = !isLoad && amo_op;
+
+ ThreadID tid = cpu->contextToThread(inst->contextId());
+ auto cacheLineSize = cpu->cacheLineSize();
+ bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
+ LSQRequest* req = nullptr;
+
+ // Atomic requests that access data across cache line boundary are
+ // currently not allowed since the cache does not guarantee corresponding
+ // atomic memory operations to be executed atomically across a cache line.
+ // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+ // the cache needs to be modified to perform atomic update to both cache
+ // lines. For now, such cross-line update is not supported.
+ assert(!isAtomic || (isAtomic && !needs_burst));
+
+ const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
+
+ if (inst->translationStarted()) {
+ req = inst->savedReq;
+ assert(req);
+ } else {
+ if (htm_cmd) {
+ assert(addr == 0x0lu);
+ assert(size == 8);
+ req = new HtmCmdRequest(&thread[tid], inst, flags);
+ } else if (needs_burst) {
+ req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res);
+ } else {
+ req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res, std::move(amo_op));
+ }
+ assert(req);
+ if (!byte_enable.empty()) {
+ req->_byteEnable = byte_enable;
+ }
+ inst->setRequest();
+ req->taskId(cpu->taskId());
+
+ // There might be fault from a previous execution attempt if this is
+ // a strictly ordered load
+ inst->getFault() = NoFault;
+
+ req->initiateTranslation();
+ }
+
+ /* This is the place were instructions get the effAddr. */
+ if (req->isTranslationComplete()) {
+ if (req->isMemAccessRequired()) {
+ inst->effAddr = req->getVaddr();
+ inst->effSize = size;
+ inst->effAddrValid(true);
+
+ if (cpu->checker) {
+ inst->reqToVerify = std::make_shared<Request>(*req->request());
+ }
+ Fault fault;
+ if (isLoad)
+ fault = cpu->read(req, inst->lqIdx);
+ else
+ fault = cpu->write(req, data, inst->sqIdx);
+ // inst->getFault() may have the first-fault of a
+ // multi-access split request at this point.
+ // Overwrite that only if we got another type of fault
+ // (e.g. re-exec).
+ if (fault != NoFault)
+ inst->getFault() = fault;
+ } else if (isLoad) {
+ inst->setMemAccPredicate(false);
+ // Commit will have to clean up whatever happened. Set this
+ // instruction as executed.
+ inst->setExecuted();
+ }
+ }
+
+ if (inst->traceData)
+ inst->traceData->setMem(addr, size, flags);
+
+ return inst->getFault();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ _fault.push_back(fault);
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 1;
+ /* If the instruction has been squahsed, let the request know
+ * as it may have to self-destruct. */
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(req->isStrictlyOrdered());
+
+ flags.set(Flag::TranslationFinished);
+ if (fault == NoFault) {
+ _inst->physEffAddr = req->getPaddr();
+ _inst->memReqFlags = req->getFlags();
+ if (req->isCondSwap()) {
+ assert(_res);
+ req->setExtraData(*_res);
+ }
+ setState(State::Request);
+ } else {
+ setState(State::Fault);
+ }
+
+ LSQRequest::_inst->fault = fault;
+ LSQRequest::_inst->translationCompleted(true);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ int i;
+ for (i = 0; i < _requests.size() && _requests[i] != req; i++);
+ assert(i < _requests.size());
+ _fault[i] = fault;
+
+ numInTranslationFragments--;
+ numTranslatedFragments++;
+
+ if (fault == NoFault)
+ mainReq->setFlags(req->getFlags());
+
+ if (numTranslatedFragments == _requests.size()) {
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
+ flags.set(Flag::TranslationFinished);
+ _inst->translationCompleted(true);
+
+ for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
+ if (i > 0) {
+ _inst->physEffAddr = request(0)->getPaddr();
+ _inst->memReqFlags = mainReq->getFlags();
+ if (mainReq->isCondSwap()) {
+ assert (i == _fault.size());
+ assert(_res);
+ mainReq->setExtraData(*_res);
+ }
+ if (i == _fault.size()) {
+ _inst->fault = NoFault;
+ setState(State::Request);
+ } else {
+ _inst->fault = _fault[i];
+ setState(State::PartialFault);
+ }
+ } else {
+ _inst->fault = _fault[0];
+ setState(State::Fault);
+ }
+ }
+
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::initiateTranslation()
+{
+ assert(_requests.size() == 0);
+
+ this->addRequest(_addr, _size, _byteEnable);
+
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ _inst->savedReq = this;
+ sendFragmentToTranslation(0);
+ } else {
+ _inst->setMemAccPredicate(false);
+ }
+}
+
+template<class Impl>
+PacketPtr
+LSQ<Impl>::SplitDataRequest::mainPacket()
+{
+ return _mainPacket;
+}
+
+template<class Impl>
+RequestPtr
+LSQ<Impl>::SplitDataRequest::mainRequest()
+{
+ return mainReq;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::initiateTranslation()
+{
+ auto cacheLineSize = _port.cacheLineSize();
+ Addr base_addr = _addr;
+ Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
+ Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
+ uint32_t size_so_far = 0;
+
+ mainReq = std::make_shared<Request>(base_addr,
+ _size, _flags, _inst->requestorId(),
+ _inst->instAddr(), _inst->contextId());
+ if (!_byteEnable.empty()) {
+ mainReq->setByteEnable(_byteEnable);
+ }
+
+ // Paddr is not used in mainReq. However, we will accumulate the flags
+ // from the sub requests into mainReq by calling setFlags() in finish().
+ // setFlags() assumes that paddr is set so flip the paddr valid bit here to
+ // avoid a potential assert in setFlags() when we call it from finish().
+ mainReq->setPaddr(0);
+
+ /* Get the pre-fix, possibly unaligned. */
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin();
+ auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+ this->addRequest(base_addr, next_addr - base_addr,
+ std::vector<bool>(it_start, it_end));
+ }
+ size_so_far = next_addr - base_addr;
+
+ /* We are block aligned now, reading whole blocks. */
+ base_addr = next_addr;
+ while (base_addr != final_addr) {
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, cacheLineSize, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+ this->addRequest(base_addr, cacheLineSize,
+ std::vector<bool>(it_start, it_end));
+ }
+ size_so_far += cacheLineSize;
+ base_addr += cacheLineSize;
+ }
+
+ /* Deal with the tail. */
+ if (size_so_far < _size) {
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.end();
+ this->addRequest(base_addr, _size - size_so_far,
+ std::vector<bool>(it_start, it_end));
+ }
+ }
+
+ if (_requests.size() > 0) {
+ /* Setup the requests and send them to translation. */
+ for (auto& r: _requests) {
+ r->setReqInstSeqNum(_inst->seqNum);
+ r->taskId(_taskId);
+ }
+
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+ this->_inst->savedReq = this;
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
+ _fault.resize(_requests.size());
+
+ for (uint32_t i = 0; i < _requests.size(); i++) {
+ sendFragmentToTranslation(i);
+ }
+ } else {
+ _inst->setMemAccPredicate(false);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
+{
+ numInTranslationFragments++;
+ _port.dTLB()->translateTiming(
+ this->request(i),
+ this->_inst->thread->getTC(), this,
+ this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ assert(_numOutstandingPackets == 1);
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ flags.set(Flag::Complete);
+ state->outstanding--;
+ assert(pkt == _packets.front());
+ _port.completeDataAccess(pkt);
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ uint32_t pktIdx = 0;
+ while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
+ pktIdx++;
+ assert(pktIdx < _packets.size());
+ numReceivedPackets++;
+ state->outstanding--;
+ if (numReceivedPackets == _packets.size()) {
+ flags.set(Flag::Complete);
+ /* Assemble packets. */
+ PacketPtr resp = isLoad()
+ ? Packet::createRead(mainReq)
+ : Packet::createWrite(mainReq);
+ if (isLoad())
+ resp->dataStatic(_inst->memData);
+ else
+ resp->dataStatic(_data);
+ resp->senderState = _senderState;
+ _port.completeDataAccess(resp);
+ delete resp;
+ }
+ return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::buildPackets()
+{
+ assert(_senderState);
+ /* Retries do not create new packets. */
+ if (_packets.size() == 0) {
+ _packets.push_back(
+ isLoad()
+ ? Packet::createRead(request())
+ : Packet::createWrite(request()));
+ _packets.back()->dataStatic(_inst->memData);
+ _packets.back()->senderState = _senderState;
+
+ // hardware transactional memory
+ // If request originates in a transaction (not necessarily a HtmCmd),
+ // then the packet should be marked as such.
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+
+ DPRINTF(HtmCpu,
+ "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ assert(_packets.size() == 1);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::buildPackets()
+{
+ /* Extra data?? */
+ Addr base_address = _addr;
+
+ if (_packets.size() == 0) {
+ /* New stuff */
+ if (isLoad()) {
+ _mainPacket = Packet::createRead(mainReq);
+ _mainPacket->dataStatic(_inst->memData);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _mainPacket->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ _inst->instAddr(),
+ _mainPacket->req->hasVaddr() ?
+ _mainPacket->req->getVaddr() : 0lu,
+ _mainPacket->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+ RequestPtr r = _requests[i];
+ PacketPtr pkt = isLoad() ? Packet::createRead(r)
+ : Packet::createWrite(r);
+ ptrdiff_t offset = r->getVaddr() - base_address;
+ if (isLoad()) {
+ pkt->dataStatic(_inst->memData + offset);
+ } else {
+ uint8_t* req_data = new uint8_t[r->getSize()];
+ std::memcpy(req_data,
+ _inst->memData + offset,
+ r->getSize());
+ pkt->dataDynamic(req_data);
+ }
+ pkt->senderState = _senderState;
+ _packets.push_back(pkt);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ i+1,
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ }
+ assert(_packets.size() > 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::sendPacketToCache()
+{
+ assert(_numOutstandingPackets == 0);
+ if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
+ _numOutstandingPackets = 1;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::sendPacketToCache()
+{
+ /* Try to send the packets. */
+ while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
+ lsqUnit()->trySendPacket(isLoad(),
+ _packets.at(numReceivedPackets + _numOutstandingPackets))) {
+ _numOutstandingPackets++;
+ }
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SingleDataRequest::handleLocalAccess(
+ ThreadContext *thread, PacketPtr pkt)
+{
+ return pkt->req->localAccessor(thread, pkt);
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SplitDataRequest::handleLocalAccess(
+ ThreadContext *thread, PacketPtr mainPkt)
+{
+ Cycles delay(0);
+ unsigned offset = 0;
+
+ for (auto r: _requests) {
+ PacketPtr pkt =
+ new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
+ pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+ Cycles d = r->localAccessor(thread, pkt);
+ if (d > delay)
+ delay = d;
+ offset += r->getSize();
+ delete pkt;
+ }
+ return delay;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
+}
+
+/**
+ * Caches may probe into the load-store queue to enforce memory ordering
+ * guarantees. This method supports probes by providing a mechanism to compare
+ * snoop messages with requests tracked by the load-store queue.
+ *
+ * Consistency models must enforce ordering constraints. TSO, for instance,
+ * must prevent memory reorderings except stores which are reordered after
+ * loads. The reordering restrictions negatively impact performance by
+ * cutting down on memory level parallelism. However, the core can regain
+ * performance by generating speculative loads. Speculative loads may issue
+ * without affecting correctness if precautions are taken to handle invalid
+ * memory orders. The load queue must squash under memory model violations.
+ * Memory model violations may occur when block ownership is granted to
+ * another core or the block cannot be accurately monitored by the load queue.
+ */
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ bool is_hit = false;
+ for (auto &r: _requests) {
+ /**
+ * The load-store queue handles partial faults which complicates this
+ * method. Physical addresses must be compared between requests and
+ * snoops. Some requests will not have a valid physical address, since
+ * partial faults may have outstanding translations. Therefore, the
+ * existence of a valid request address must be checked before
+ * comparing block hits. We assume no pipeline squash is needed if a
+ * valid request address does not exist.
+ */
+ if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
+ is_hit = true;
+ break;
+ }
+ }
+ return is_hit;
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
+{
+ return lsq->recvTimingResp(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
+{
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup(tid);
+ }
+ }
+ lsq->recvTimingSnoopReq(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvReqRetry()
+{
+ lsq->recvReqRetry();
+}
+
+template<class Impl>
+LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
+ const DynInstPtr& inst,
+ const Request::Flags& flags_) :
+ SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
+ nullptr, nullptr, nullptr)
+{
+ assert(_requests.size() == 0);
+
+ this->addRequest(_addr, _size, _byteEnable);
+
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _requests.back()->setPaddr(_addr);
+ _requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
+
+ _inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
+ _inst->fault = NoFault;
+ _inst->physEffAddr = _requests.back()->getPaddr();
+ _inst->memReqFlags = _requests.back()->getFlags();
+ _inst->savedReq = this;
+
+ setState(State::Translation);
+ } else {
+ panic("unexpected behaviour");
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::initiateTranslation()
+{
+ // Transaction commands are implemented as loads to avoid significant
+ // changes to the cpu and memory interfaces
+ // The virtual and physical address uses a dummy value of 0x00
+ // Address translation does not really occur thus the code below
+
+ flags.set(Flag::TranslationStarted);
+ flags.set(Flag::TranslationFinished);
+
+ _inst->translationStarted(true);
+ _inst->translationCompleted(true);
+
+ setState(State::Request);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ panic("unexpected behaviour");
+}
+
+#endif//__CPU_O3_LSQ_IMPL_HH__