/*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Korey Sewell
*/
#ifndef __CPU_O3_LSQ_IMPL_HH__
#include <list>
#include <string>
+#include "base/logging.hh"
+#include "cpu/o3/cpu.hh"
#include "cpu/o3/lsq.hh"
#include "debug/Drain.hh"
#include "debug/Fetch.hh"
+#include "debug/HtmCpu.hh"
#include "debug/LSQ.hh"
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
template <class Impl>
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
: cpu(cpu_ptr), iewStage(iew_ptr),
+ _cacheBlocked(false),
+ cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
+ cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0),
+ lsqPolicy(params->smtLSQPolicy),
LQEntries(params->LQEntries),
SQEntries(params->SQEntries),
+ maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads,
+ params->smtLSQThreshold)),
+ maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads,
+ params->smtLSQThreshold)),
+ dcachePort(this, cpu_ptr),
numThreads(params->numThreads)
{
assert(numThreads > 0 && numThreads <= Impl::MaxThreads);
//**********************************************/
//************ Handle SMT Parameters ***********/
//**********************************************/
- std::string policy = params->smtLSQPolicy;
-
- //Convert string to lowercase
- std::transform(policy.begin(), policy.end(), policy.begin(),
- (int(*)(int)) tolower);
-
- //Figure out fetch policy
- if (policy == "dynamic") {
- lsqPolicy = Dynamic;
-
- maxLQEntries = LQEntries;
- maxSQEntries = SQEntries;
+ /* Run SMT olicy checks. */
+ if (lsqPolicy == SMTQueuePolicy::Dynamic) {
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
- } else if (policy == "partitioned") {
- lsqPolicy = Partitioned;
-
- //@todo:make work if part_amt doesnt divide evenly.
- maxLQEntries = LQEntries / numThreads;
- maxSQEntries = SQEntries / numThreads;
-
+ } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
- } else if (policy == "threshold") {
- lsqPolicy = Threshold;
+ } else if (lsqPolicy == SMTQueuePolicy::Threshold) {
- assert(params->smtLSQThreshold > LQEntries);
- assert(params->smtLSQThreshold > SQEntries);
-
- //Divide up by threshold amount
- //@todo: Should threads check the max and the total
- //amount of the LSQ
- maxLQEntries = params->smtLSQThreshold;
- maxSQEntries = params->smtLSQThreshold;
+ assert(params->smtLSQThreshold > params->LQEntries);
+ assert(params->smtLSQThreshold > params->SQEntries);
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
} else {
- assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
- "Partitioned, Threshold}");
+ panic("Invalid LSQ sharing policy. Options are: Dynamic, "
+ "Partitioned, Threshold");
}
- //Initialize LSQs
- thread = new LSQUnit[numThreads];
+ thread.reserve(numThreads);
for (ThreadID tid = 0; tid < numThreads; tid++) {
- thread[tid].init(cpu, iew_ptr, params, this,
- maxLQEntries, maxSQEntries, tid);
- thread[tid].setDcachePort(&cpu_ptr->getDataPort());
+ thread.emplace_back(maxLQEntries, maxSQEntries);
+ thread[tid].init(cpu, iew_ptr, params, this, tid);
+ thread[tid].setDcachePort(&dcachePort);
}
}
return iewStage->name() + ".lsq";
}
-template<class Impl>
-void
-LSQ<Impl>::regStats()
-{
- //Initialize LSQs
- for (ThreadID tid = 0; tid < numThreads; tid++) {
- thread[tid].regStats();
- }
-}
-
template<class Impl>
void
LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
void
LSQ<Impl>::takeOverFrom()
{
+ usedStorePorts = 0;
+ _cacheBlocked = false;
+
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].takeOverFrom();
}
}
-template <class Impl>
-int
-LSQ<Impl>::entryAmount(ThreadID num_threads)
-{
- if (lsqPolicy == Partitioned) {
- return LQEntries / num_threads;
- } else {
- return 0;
- }
-}
-
template <class Impl>
void
-LSQ<Impl>::resetEntries()
+LSQ<Impl>::tick()
{
- if (lsqPolicy != Dynamic || numThreads > 1) {
- int active_threads = activeThreads->size();
-
- int maxEntries;
+ // Re-issue loads which got blocked on the per-cycle load ports limit.
+ if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked)
+ iewStage->cacheUnblocked();
- if (lsqPolicy == Partitioned) {
- maxEntries = LQEntries / active_threads;
- } else if (lsqPolicy == Threshold && active_threads == 1) {
- maxEntries = LQEntries;
- } else {
- maxEntries = LQEntries;
- }
-
- list<ThreadID>::iterator threads = activeThreads->begin();
- list<ThreadID>::iterator end = activeThreads->end();
-
- while (threads != end) {
- ThreadID tid = *threads++;
+ usedLoadPorts = 0;
+ usedStorePorts = 0;
+}
- resizeEntries(maxEntries, tid);
- }
- }
+template<class Impl>
+bool
+LSQ<Impl>::cacheBlocked() const
+{
+ return _cacheBlocked;
}
template<class Impl>
void
-LSQ<Impl>::removeEntries(ThreadID tid)
+LSQ<Impl>::cacheBlocked(bool v)
{
- thread[tid].clearLQ();
- thread[tid].clearSQ();
+ _cacheBlocked = v;
}
template<class Impl>
-void
-LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
+bool
+LSQ<Impl>::cachePortAvailable(bool is_load) const
{
- thread[tid].resizeLQ(size);
- thread[tid].resizeSQ(size);
+ bool ret;
+ if (is_load) {
+ ret = usedLoadPorts < cacheLoadPorts;
+ } else {
+ ret = usedStorePorts < cacheStorePorts;
+ }
+ return ret;
}
template<class Impl>
void
-LSQ<Impl>::tick()
+LSQ<Impl>::cachePortBusy(bool is_load)
{
- list<ThreadID>::iterator threads = activeThreads->begin();
- list<ThreadID>::iterator end = activeThreads->end();
-
- while (threads != end) {
- ThreadID tid = *threads++;
-
- thread[tid].tick();
+ assert(cachePortAvailable(is_load));
+ if (is_load) {
+ usedLoadPorts++;
+ } else {
+ usedStorePorts++;
}
}
LSQ<Impl>::recvReqRetry()
{
iewStage->cacheUnblocked();
+ cacheBlocked(false);
for (ThreadID tid : *activeThreads) {
thread[tid].recvRetry();
}
}
+template <class Impl>
+void
+LSQ<Impl>::completeDataAccess(PacketPtr pkt)
+{
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ thread[cpu->contextToThread(senderState->contextId())]
+ .completeDataAccess(pkt);
+}
+
template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
- thread[cpu->contextToThread(pkt->req->contextId())]
- .completeDataAccess(pkt);
+ auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ panic_if(!senderState, "Got packet back with unknown sender state\n");
+
+ thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);
if (pkt->isInvalidate()) {
// This response also contains an invalidate; e.g. this can be the case
thread[tid].checkSnoop(pkt);
}
}
+ // Update the LSQRequest state (this may delete the request)
+ senderState->request()->packetReplied();
- delete pkt;
return true;
}
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if (lsqPolicy == Dynamic)
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isFull();
else
return thread[tid].lqFull() || thread[tid].sqFull();
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if (lsqPolicy == Dynamic)
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return lqFull();
else
return thread[tid].lqFull();
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
- if (lsqPolicy == Dynamic)
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return sqFull();
else
return thread[tid].sqFull();
bool
LSQ<Impl>::isStalled(ThreadID tid)
{
- if (lsqPolicy == Dynamic)
+ if (lsqPolicy == SMTQueuePolicy::Dynamic)
return isStalled();
else
return thread[tid].isStalled();
}
}
+template<class Impl>
+Fault
+LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+ unsigned int size, Addr addr, Request::Flags flags,
+ uint64_t *res, AtomicOpFunctorPtr amo_op,
+ const std::vector<bool>& byte_enable)
+{
+ // This comming request can be either load, store or atomic.
+ // Atomic request has a corresponding pointer to its atomic memory
+ // operation
+ bool isAtomic M5_VAR_USED = !isLoad && amo_op;
+
+ ThreadID tid = cpu->contextToThread(inst->contextId());
+ auto cacheLineSize = cpu->cacheLineSize();
+ bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
+ LSQRequest* req = nullptr;
+
+ // Atomic requests that access data across cache line boundary are
+ // currently not allowed since the cache does not guarantee corresponding
+ // atomic memory operations to be executed atomically across a cache line.
+ // For ISAs such as x86 that supports cross-cache-line atomic instructions,
+ // the cache needs to be modified to perform atomic update to both cache
+ // lines. For now, such cross-line update is not supported.
+ assert(!isAtomic || (isAtomic && !needs_burst));
+
+ const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
+
+ if (inst->translationStarted()) {
+ req = inst->savedReq;
+ assert(req);
+ } else {
+ if (htm_cmd) {
+ assert(addr == 0x0lu);
+ assert(size == 8);
+ req = new HtmCmdRequest(&thread[tid], inst, flags);
+ } else if (needs_burst) {
+ req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res);
+ } else {
+ req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
+ size, flags, data, res, std::move(amo_op));
+ }
+ assert(req);
+ if (!byte_enable.empty()) {
+ req->_byteEnable = byte_enable;
+ }
+ inst->setRequest();
+ req->taskId(cpu->taskId());
+
+ // There might be fault from a previous execution attempt if this is
+ // a strictly ordered load
+ inst->getFault() = NoFault;
+
+ req->initiateTranslation();
+ }
+
+ /* This is the place were instructions get the effAddr. */
+ if (req->isTranslationComplete()) {
+ if (req->isMemAccessRequired()) {
+ inst->effAddr = req->getVaddr();
+ inst->effSize = size;
+ inst->effAddrValid(true);
+
+ if (cpu->checker) {
+ inst->reqToVerify = std::make_shared<Request>(*req->request());
+ }
+ Fault fault;
+ if (isLoad)
+ fault = cpu->read(req, inst->lqIdx);
+ else
+ fault = cpu->write(req, data, inst->sqIdx);
+ // inst->getFault() may have the first-fault of a
+ // multi-access split request at this point.
+ // Overwrite that only if we got another type of fault
+ // (e.g. re-exec).
+ if (fault != NoFault)
+ inst->getFault() = fault;
+ } else if (isLoad) {
+ inst->setMemAccPredicate(false);
+ // Commit will have to clean up whatever happened. Set this
+ // instruction as executed.
+ inst->setExecuted();
+ }
+ }
+
+ if (inst->traceData)
+ inst->traceData->setMem(addr, size, flags);
+
+ return inst->getFault();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ _fault.push_back(fault);
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 1;
+ /* If the instruction has been squahsed, let the request know
+ * as it may have to self-destruct. */
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(req->isStrictlyOrdered());
+
+ flags.set(Flag::TranslationFinished);
+ if (fault == NoFault) {
+ _inst->physEffAddr = req->getPaddr();
+ _inst->memReqFlags = req->getFlags();
+ if (req->isCondSwap()) {
+ assert(_res);
+ req->setExtraData(*_res);
+ }
+ setState(State::Request);
+ } else {
+ setState(State::Fault);
+ }
+
+ LSQRequest::_inst->fault = fault;
+ LSQRequest::_inst->translationCompleted(true);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ int i;
+ for (i = 0; i < _requests.size() && _requests[i] != req; i++);
+ assert(i < _requests.size());
+ _fault[i] = fault;
+
+ numInTranslationFragments--;
+ numTranslatedFragments++;
+
+ if (fault == NoFault)
+ mainReq->setFlags(req->getFlags());
+
+ if (numTranslatedFragments == _requests.size()) {
+ if (_inst->isSquashed()) {
+ this->squashTranslation();
+ } else {
+ _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
+ flags.set(Flag::TranslationFinished);
+ _inst->translationCompleted(true);
+
+ for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++);
+ if (i > 0) {
+ _inst->physEffAddr = request(0)->getPaddr();
+ _inst->memReqFlags = mainReq->getFlags();
+ if (mainReq->isCondSwap()) {
+ assert (i == _fault.size());
+ assert(_res);
+ mainReq->setExtraData(*_res);
+ }
+ if (i == _fault.size()) {
+ _inst->fault = NoFault;
+ setState(State::Request);
+ } else {
+ _inst->fault = _fault[i];
+ setState(State::PartialFault);
+ }
+ } else {
+ _inst->fault = _fault[0];
+ setState(State::Fault);
+ }
+ }
+
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::initiateTranslation()
+{
+ assert(_requests.size() == 0);
+
+ this->addRequest(_addr, _size, _byteEnable);
+
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+
+ _inst->savedReq = this;
+ sendFragmentToTranslation(0);
+ } else {
+ _inst->setMemAccPredicate(false);
+ }
+}
+
+template<class Impl>
+PacketPtr
+LSQ<Impl>::SplitDataRequest::mainPacket()
+{
+ return _mainPacket;
+}
+
+template<class Impl>
+RequestPtr
+LSQ<Impl>::SplitDataRequest::mainRequest()
+{
+ return mainReq;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::initiateTranslation()
+{
+ auto cacheLineSize = _port.cacheLineSize();
+ Addr base_addr = _addr;
+ Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
+ Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
+ uint32_t size_so_far = 0;
+
+ mainReq = std::make_shared<Request>(base_addr,
+ _size, _flags, _inst->requestorId(),
+ _inst->instAddr(), _inst->contextId());
+ if (!_byteEnable.empty()) {
+ mainReq->setByteEnable(_byteEnable);
+ }
+
+ // Paddr is not used in mainReq. However, we will accumulate the flags
+ // from the sub requests into mainReq by calling setFlags() in finish().
+ // setFlags() assumes that paddr is set so flip the paddr valid bit here to
+ // avoid a potential assert in setFlags() when we call it from finish().
+ mainReq->setPaddr(0);
+
+ /* Get the pre-fix, possibly unaligned. */
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, next_addr - base_addr, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin();
+ auto it_end = _byteEnable.begin() + (next_addr - base_addr);
+ this->addRequest(base_addr, next_addr - base_addr,
+ std::vector<bool>(it_start, it_end));
+ }
+ size_so_far = next_addr - base_addr;
+
+ /* We are block aligned now, reading whole blocks. */
+ base_addr = next_addr;
+ while (base_addr != final_addr) {
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, cacheLineSize, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize;
+ this->addRequest(base_addr, cacheLineSize,
+ std::vector<bool>(it_start, it_end));
+ }
+ size_so_far += cacheLineSize;
+ base_addr += cacheLineSize;
+ }
+
+ /* Deal with the tail. */
+ if (size_so_far < _size) {
+ if (_byteEnable.empty()) {
+ this->addRequest(base_addr, _size - size_so_far, _byteEnable);
+ } else {
+ auto it_start = _byteEnable.begin() + size_so_far;
+ auto it_end = _byteEnable.end();
+ this->addRequest(base_addr, _size - size_so_far,
+ std::vector<bool>(it_start, it_end));
+ }
+ }
+
+ if (_requests.size() > 0) {
+ /* Setup the requests and send them to translation. */
+ for (auto& r: _requests) {
+ r->setReqInstSeqNum(_inst->seqNum);
+ r->taskId(_taskId);
+ }
+
+ _inst->translationStarted(true);
+ setState(State::Translation);
+ flags.set(Flag::TranslationStarted);
+ this->_inst->savedReq = this;
+ numInTranslationFragments = 0;
+ numTranslatedFragments = 0;
+ _fault.resize(_requests.size());
+
+ for (uint32_t i = 0; i < _requests.size(); i++) {
+ sendFragmentToTranslation(i);
+ }
+ } else {
+ _inst->setMemAccPredicate(false);
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
+{
+ numInTranslationFragments++;
+ _port.dTLB()->translateTiming(
+ this->request(i),
+ this->_inst->thread->getTC(), this,
+ this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ assert(_numOutstandingPackets == 1);
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ flags.set(Flag::Complete);
+ state->outstanding--;
+ assert(pkt == _packets.front());
+ _port.completeDataAccess(pkt);
+ return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
+{
+ auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+ uint32_t pktIdx = 0;
+ while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
+ pktIdx++;
+ assert(pktIdx < _packets.size());
+ numReceivedPackets++;
+ state->outstanding--;
+ if (numReceivedPackets == _packets.size()) {
+ flags.set(Flag::Complete);
+ /* Assemble packets. */
+ PacketPtr resp = isLoad()
+ ? Packet::createRead(mainReq)
+ : Packet::createWrite(mainReq);
+ if (isLoad())
+ resp->dataStatic(_inst->memData);
+ else
+ resp->dataStatic(_data);
+ resp->senderState = _senderState;
+ _port.completeDataAccess(resp);
+ delete resp;
+ }
+ return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::buildPackets()
+{
+ assert(_senderState);
+ /* Retries do not create new packets. */
+ if (_packets.size() == 0) {
+ _packets.push_back(
+ isLoad()
+ ? Packet::createRead(request())
+ : Packet::createWrite(request()));
+ _packets.back()->dataStatic(_inst->memData);
+ _packets.back()->senderState = _senderState;
+
+ // hardware transactional memory
+ // If request originates in a transaction (not necessarily a HtmCmd),
+ // then the packet should be marked as such.
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+
+ DPRINTF(HtmCpu,
+ "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ assert(_packets.size() == 1);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::buildPackets()
+{
+ /* Extra data?? */
+ Addr base_address = _addr;
+
+ if (_packets.size() == 0) {
+ /* New stuff */
+ if (isLoad()) {
+ _mainPacket = Packet::createRead(mainReq);
+ _mainPacket->dataStatic(_inst->memData);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _mainPacket->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ _inst->instAddr(),
+ _mainPacket->req->hasVaddr() ?
+ _mainPacket->req->getVaddr() : 0lu,
+ _mainPacket->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
+ RequestPtr r = _requests[i];
+ PacketPtr pkt = isLoad() ? Packet::createRead(r)
+ : Packet::createWrite(r);
+ ptrdiff_t offset = r->getVaddr() - base_address;
+ if (isLoad()) {
+ pkt->dataStatic(_inst->memData + offset);
+ } else {
+ uint8_t* req_data = new uint8_t[r->getSize()];
+ std::memcpy(req_data,
+ _inst->memData + offset,
+ r->getSize());
+ pkt->dataDynamic(req_data);
+ }
+ pkt->senderState = _senderState;
+ _packets.push_back(pkt);
+
+ // hardware transactional memory
+ // If request originates in a transaction,
+ // packet should be marked as such
+ if (_inst->inHtmTransactionalState()) {
+ _packets.back()->setHtmTransactional(
+ _inst->getHtmTransactionUid());
+ DPRINTF(HtmCpu,
+ "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
+ isLoad() ? "LD" : "ST",
+ i+1,
+ _inst->instAddr(),
+ _packets.back()->req->hasVaddr() ?
+ _packets.back()->req->getVaddr() : 0lu,
+ _packets.back()->getAddr(),
+ _inst->getHtmTransactionUid());
+ }
+ }
+ }
+ assert(_packets.size() > 0);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::sendPacketToCache()
+{
+ assert(_numOutstandingPackets == 0);
+ if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
+ _numOutstandingPackets = 1;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::sendPacketToCache()
+{
+ /* Try to send the packets. */
+ while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
+ lsqUnit()->trySendPacket(isLoad(),
+ _packets.at(numReceivedPackets + _numOutstandingPackets))) {
+ _numOutstandingPackets++;
+ }
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SingleDataRequest::handleLocalAccess(
+ ThreadContext *thread, PacketPtr pkt)
+{
+ return pkt->req->localAccessor(thread, pkt);
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SplitDataRequest::handleLocalAccess(
+ ThreadContext *thread, PacketPtr mainPkt)
+{
+ Cycles delay(0);
+ unsigned offset = 0;
+
+ for (auto r: _requests) {
+ PacketPtr pkt =
+ new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq);
+ pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+ Cycles d = r->localAccessor(thread, pkt);
+ if (d > delay)
+ delay = d;
+ offset += r->getSize();
+ delete pkt;
+ }
+ return delay;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
+}
+
+/**
+ * Caches may probe into the load-store queue to enforce memory ordering
+ * guarantees. This method supports probes by providing a mechanism to compare
+ * snoop messages with requests tracked by the load-store queue.
+ *
+ * Consistency models must enforce ordering constraints. TSO, for instance,
+ * must prevent memory reorderings except stores which are reordered after
+ * loads. The reordering restrictions negatively impact performance by
+ * cutting down on memory level parallelism. However, the core can regain
+ * performance by generating speculative loads. Speculative loads may issue
+ * without affecting correctness if precautions are taken to handle invalid
+ * memory orders. The load queue must squash under memory model violations.
+ * Memory model violations may occur when block ownership is granted to
+ * another core or the block cannot be accurately monitored by the load queue.
+ */
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+ bool is_hit = false;
+ for (auto &r: _requests) {
+ /**
+ * The load-store queue handles partial faults which complicates this
+ * method. Physical addresses must be compared between requests and
+ * snoops. Some requests will not have a valid physical address, since
+ * partial faults may have outstanding translations. Therefore, the
+ * existence of a valid request address must be checked before
+ * comparing block hits. We assume no pipeline squash is needed if a
+ * valid request address does not exist.
+ */
+ if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) {
+ is_hit = true;
+ break;
+ }
+ }
+ return is_hit;
+}
+
+template <class Impl>
+bool
+LSQ<Impl>::DcachePort::recvTimingResp(PacketPtr pkt)
+{
+ return lsq->recvTimingResp(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
+{
+ for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
+ if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+ cpu->wakeup(tid);
+ }
+ }
+ lsq->recvTimingSnoopReq(pkt);
+}
+
+template <class Impl>
+void
+LSQ<Impl>::DcachePort::recvReqRetry()
+{
+ lsq->recvReqRetry();
+}
+
+template<class Impl>
+LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
+ const DynInstPtr& inst,
+ const Request::Flags& flags_) :
+ SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
+ nullptr, nullptr, nullptr)
+{
+ assert(_requests.size() == 0);
+
+ this->addRequest(_addr, _size, _byteEnable);
+
+ if (_requests.size() > 0) {
+ _requests.back()->setReqInstSeqNum(_inst->seqNum);
+ _requests.back()->taskId(_taskId);
+ _requests.back()->setPaddr(_addr);
+ _requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
+
+ _inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
+ _inst->fault = NoFault;
+ _inst->physEffAddr = _requests.back()->getPaddr();
+ _inst->memReqFlags = _requests.back()->getFlags();
+ _inst->savedReq = this;
+
+ setState(State::Translation);
+ } else {
+ panic("unexpected behaviour");
+ }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::initiateTranslation()
+{
+ // Transaction commands are implemented as loads to avoid significant
+ // changes to the cpu and memory interfaces
+ // The virtual and physical address uses a dummy value of 0x00
+ // Address translation does not really occur thus the code below
+
+ flags.set(Flag::TranslationStarted);
+ flags.set(Flag::TranslationFinished);
+
+ _inst->translationStarted(true);
+ _inst->translationCompleted(true);
+
+ setState(State::Request);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
+ ThreadContext* tc, BaseTLB::Mode mode)
+{
+ panic("unexpected behaviour");
+}
+
#endif//__CPU_O3_LSQ_IMPL_HH__