X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcpu%2Fo3%2Flsq_impl.hh;h=c4cb45ea06a922e222ea75428c5a51d5f4c72248;hb=6df6f9aa98022457cff470118c88b5b8f1a6ba21;hp=cb40d552e7c8cedced14b94560e89e6622a4aefc;hpb=9aecfb3e3bfe1b85db9468bad287f22a2eb9bd4e;p=gem5.git diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index cb40d552e..c4cb45ea0 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,4 +1,17 @@ /* + * Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2005-2006 The Regents of The University of Michigan * All rights reserved. * @@ -24,138 +37,73 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Korey Sewell */ +#ifndef __CPU_O3_LSQ_IMPL_HH__ +#define __CPU_O3_LSQ_IMPL_HH__ + #include #include #include +#include "base/logging.hh" +#include "cpu/o3/cpu.hh" #include "cpu/o3/lsq.hh" +#include "debug/Drain.hh" +#include "debug/Fetch.hh" +#include "debug/HtmCpu.hh" +#include "debug/LSQ.hh" +#include "debug/Writeback.hh" +#include "params/DerivO3CPU.hh" -template -Tick -LSQ::DcachePort::recvAtomic(PacketPtr pkt) -{ - panic("O3CPU model does not work with atomic mode!"); - return curTick; -} +using namespace std; template -void -LSQ::DcachePort::recvFunctional(PacketPtr pkt) +LSQ::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params) + : cpu(cpu_ptr), iewStage(iew_ptr), + _cacheBlocked(false), + cacheStorePorts(params->cacheStorePorts), usedStorePorts(0), + cacheLoadPorts(params->cacheLoadPorts), usedLoadPorts(0), + lsqPolicy(params->smtLSQPolicy), + LQEntries(params->LQEntries), + SQEntries(params->SQEntries), + maxLQEntries(maxLSQAllocation(lsqPolicy, LQEntries, params->numThreads, + params->smtLSQThreshold)), + maxSQEntries(maxLSQAllocation(lsqPolicy, SQEntries, params->numThreads, + params->smtLSQThreshold)), + dcachePort(this, cpu_ptr), + numThreads(params->numThreads) { - DPRINTF(LSQ, "LSQ doesn't update things on a recvFunctional."); -} - -template -void -LSQ::DcachePort::recvStatusChange(Status status) -{ - if (status == RangeChange) { - if (!snoopRangeSent) { - snoopRangeSent = true; - sendStatusChange(Port::RangeChange); - } - return; - } - panic("O3CPU doesn't expect recvStatusChange callback!"); -} - -template -bool -LSQ::DcachePort::recvTiming(PacketPtr pkt) -{ - if (pkt->isResponse()) { - lsq->thread[pkt->req->getThreadNum()].completeDataAccess(pkt); - } - else { - //else it is a coherence request, maybe you need to do something - warn("Recieved a coherence request (Invalidate?), 03CPU doesn't" - "update LSQ for these\n"); - } - return true; -} - -template -void -LSQ::DcachePort::recvRetry() -{ - if (lsq->retryTid == -1) - { - //Squashed, so drop it - return; - } - lsq->thread[lsq->retryTid].recvRetry(); - // Speculatively clear the retry Tid. This will get set again if - // the LSQUnit was unable to complete its access. - lsq->retryTid = -1; -} - -template -LSQ::LSQ(Params *params) - : dcachePort(this), LQEntries(params->LQEntries), - SQEntries(params->SQEntries), numThreads(params->numberOfThreads), - retryTid(-1) -{ - DPRINTF(LSQ, "Creating LSQ object.\n"); - - dcachePort.snoopRangeSent = false; + assert(numThreads > 0 && numThreads <= Impl::MaxThreads); //**********************************************/ //************ Handle SMT Parameters ***********/ //**********************************************/ - std::string policy = params->smtLSQPolicy; - - //Convert string to lowercase - std::transform(policy.begin(), policy.end(), policy.begin(), - (int(*)(int)) tolower); - - //Figure out fetch policy - if (policy == "dynamic") { - lsqPolicy = Dynamic; - - maxLQEntries = LQEntries; - maxSQEntries = SQEntries; + /* Run SMT olicy checks. */ + if (lsqPolicy == SMTQueuePolicy::Dynamic) { DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n"); - - } else if (policy == "partitioned") { - lsqPolicy = Partitioned; - - //@todo:make work if part_amt doesnt divide evenly. - maxLQEntries = LQEntries / numThreads; - maxSQEntries = SQEntries / numThreads; - + } else if (lsqPolicy == SMTQueuePolicy::Partitioned) { DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: " - "%i entries per LQ | %i entries per SQ", + "%i entries per LQ | %i entries per SQ\n", maxLQEntries,maxSQEntries); + } else if (lsqPolicy == SMTQueuePolicy::Threshold) { - } else if (policy == "threshold") { - lsqPolicy = Threshold; - - assert(params->smtLSQThreshold > LQEntries); - assert(params->smtLSQThreshold > SQEntries); - - //Divide up by threshold amount - //@todo: Should threads check the max and the total - //amount of the LSQ - maxLQEntries = params->smtLSQThreshold; - maxSQEntries = params->smtLSQThreshold; + assert(params->smtLSQThreshold > params->LQEntries); + assert(params->smtLSQThreshold > params->SQEntries); DPRINTF(LSQ, "LSQ sharing policy set to Threshold: " - "%i entries per LQ | %i entries per SQ", + "%i entries per LQ | %i entries per SQ\n", maxLQEntries,maxSQEntries); - } else { - assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," - "Partitioned, Threshold}"); + panic("Invalid LSQ sharing policy. Options are: Dynamic, " + "Partitioned, Threshold"); } - //Initialize LSQs - for (int tid=0; tid < numThreads; tid++) { - thread[tid].init(params, this, maxLQEntries, maxSQEntries, tid); + thread.reserve(numThreads); + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread.emplace_back(maxLQEntries, maxSQEntries); + thread[tid].init(cpu, iew_ptr, params, this, tid); thread[tid].setDcachePort(&dcachePort); } } @@ -170,165 +118,136 @@ LSQ::name() const template void -LSQ::regStats() -{ - //Initialize LSQs - for (int tid=0; tid < numThreads; tid++) { - thread[tid].regStats(); - } -} - -template -void -LSQ::setActiveThreads(std::list *at_ptr) +LSQ::setActiveThreads(list *at_ptr) { activeThreads = at_ptr; assert(activeThreads != 0); } -template +template void -LSQ::setCPU(O3CPU *cpu_ptr) +LSQ::drainSanityCheck() const { - cpu = cpu_ptr; + assert(isDrained()); - dcachePort.setName(name()); - - for (int tid=0; tid < numThreads; tid++) { - thread[tid].setCPU(cpu_ptr); - } + for (ThreadID tid = 0; tid < numThreads; tid++) + thread[tid].drainSanityCheck(); } -template -void -LSQ::setIEW(IEW *iew_ptr) +template +bool +LSQ::isDrained() const { - iewStage = iew_ptr; + bool drained(true); - for (int tid=0; tid < numThreads; tid++) { - thread[tid].setIEW(iew_ptr); + if (!lqEmpty()) { + DPRINTF(Drain, "Not drained, LQ not empty.\n"); + drained = false; } -} -template -void -LSQ::switchOut() -{ - for (int tid = 0; tid < numThreads; tid++) { - thread[tid].switchOut(); + if (!sqEmpty()) { + DPRINTF(Drain, "Not drained, SQ not empty.\n"); + drained = false; } + + return drained; } template void LSQ::takeOverFrom() { - for (int tid = 0; tid < numThreads; tid++) { - thread[tid].takeOverFrom(); - } -} + usedStorePorts = 0; + _cacheBlocked = false; -template -int -LSQ::entryAmount(int num_threads) -{ - if (lsqPolicy == Partitioned) { - return LQEntries / num_threads; - } else { - return 0; + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].takeOverFrom(); } } template void -LSQ::resetEntries() +LSQ::tick() { - if (lsqPolicy != Dynamic || numThreads > 1) { - int active_threads = activeThreads->size(); - - int maxEntries; + // Re-issue loads which got blocked on the per-cycle load ports limit. + if (usedLoadPorts == cacheLoadPorts && !_cacheBlocked) + iewStage->cacheUnblocked(); - if (lsqPolicy == Partitioned) { - maxEntries = LQEntries / active_threads; - } else if (lsqPolicy == Threshold && active_threads == 1) { - maxEntries = LQEntries; - } else { - maxEntries = LQEntries; - } - - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - while (threads != end) { - unsigned tid = *threads++; + usedLoadPorts = 0; + usedStorePorts = 0; +} - resizeEntries(maxEntries, tid); - } - } +template +bool +LSQ::cacheBlocked() const +{ + return _cacheBlocked; } template void -LSQ::removeEntries(unsigned tid) +LSQ::cacheBlocked(bool v) { - thread[tid].clearLQ(); - thread[tid].clearSQ(); + _cacheBlocked = v; } template -void -LSQ::resizeEntries(unsigned size,unsigned tid) +bool +LSQ::cachePortAvailable(bool is_load) const { - thread[tid].resizeLQ(size); - thread[tid].resizeSQ(size); + bool ret; + if (is_load) { + ret = usedLoadPorts < cacheLoadPorts; + } else { + ret = usedStorePorts < cacheStorePorts; + } + return ret; } template void -LSQ::tick() +LSQ::cachePortBusy(bool is_load) { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - while (threads != end) { - unsigned tid = *threads++; - - thread[tid].tick(); + assert(cachePortAvailable(is_load)); + if (is_load) { + usedLoadPorts++; + } else { + usedStorePorts++; } } template void -LSQ::insertLoad(DynInstPtr &load_inst) +LSQ::insertLoad(const DynInstPtr &load_inst) { - unsigned tid = load_inst->threadNumber; + ThreadID tid = load_inst->threadNumber; thread[tid].insertLoad(load_inst); } template void -LSQ::insertStore(DynInstPtr &store_inst) +LSQ::insertStore(const DynInstPtr &store_inst) { - unsigned tid = store_inst->threadNumber; + ThreadID tid = store_inst->threadNumber; thread[tid].insertStore(store_inst); } template Fault -LSQ::executeLoad(DynInstPtr &inst) +LSQ::executeLoad(const DynInstPtr &inst) { - unsigned tid = inst->threadNumber; + ThreadID tid = inst->threadNumber; return thread[tid].executeLoad(inst); } template Fault -LSQ::executeStore(DynInstPtr &inst) +LSQ::executeStore(const DynInstPtr &inst) { - unsigned tid = inst->threadNumber; + ThreadID tid = inst->threadNumber; return thread[tid].executeStore(inst); } @@ -337,11 +256,11 @@ template void LSQ::writebackStores() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (numStoresToWB(tid) > 0) { DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores " @@ -357,11 +276,11 @@ bool LSQ::violation() { /* Answers: Does Anybody Have a Violation?*/ - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (thread[tid].violation()) return true; @@ -370,17 +289,93 @@ LSQ::violation() return false; } +template +void +LSQ::recvReqRetry() +{ + iewStage->cacheUnblocked(); + cacheBlocked(false); + + for (ThreadID tid : *activeThreads) { + thread[tid].recvRetry(); + } +} + +template +void +LSQ::completeDataAccess(PacketPtr pkt) +{ + auto senderState = dynamic_cast(pkt->senderState); + thread[cpu->contextToThread(senderState->contextId())] + .completeDataAccess(pkt); +} + +template +bool +LSQ::recvTimingResp(PacketPtr pkt) +{ + if (pkt->isError()) + DPRINTF(LSQ, "Got error packet back for address: %#X\n", + pkt->getAddr()); + + auto senderState = dynamic_cast(pkt->senderState); + panic_if(!senderState, "Got packet back with unknown sender state\n"); + + thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt); + + if (pkt->isInvalidate()) { + // This response also contains an invalidate; e.g. this can be the case + // if cmd is ReadRespWithInvalidate. + // + // The calling order between completeDataAccess and checkSnoop matters. + // By calling checkSnoop after completeDataAccess, we ensure that the + // fault set by checkSnoop is not lost. Calling writeback (more + // specifically inst->completeAcc) in completeDataAccess overwrites + // fault, and in case this instruction requires squashing (as + // determined by checkSnoop), the ReExec fault set by checkSnoop would + // be lost otherwise. + + DPRINTF(LSQ, "received invalidation with response for addr:%#x\n", + pkt->getAddr()); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].checkSnoop(pkt); + } + } + // Update the LSQRequest state (this may delete the request) + senderState->request()->packetReplied(); + + return true; +} + +template +void +LSQ::recvTimingSnoopReq(PacketPtr pkt) +{ + DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(), + pkt->cmdString()); + + // must be a snoop + if (pkt->isInvalidate()) { + DPRINTF(LSQ, "received invalidation for addr:%#x\n", + pkt->getAddr()); + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].checkSnoop(pkt); + } + } +} + template int LSQ::getCount() { unsigned total = 0; - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; total += getCount(tid); } @@ -394,11 +389,11 @@ LSQ::numLoads() { unsigned total = 0; - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; total += numLoads(tid); } @@ -412,11 +407,11 @@ LSQ::numStores() { unsigned total = 0; - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; total += thread[tid].numStores(); } @@ -425,18 +420,18 @@ LSQ::numStores() } template -int -LSQ::numLoadsReady() +unsigned +LSQ::numFreeLoadEntries() { unsigned total = 0; - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; - total += thread[tid].numLoadsReady(); + total += thread[tid].numFreeLoadEntries(); } return total; @@ -444,17 +439,17 @@ LSQ::numLoadsReady() template unsigned -LSQ::numFreeEntries() +LSQ::numFreeStoreEntries() { unsigned total = 0; - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; - total += thread[tid].numFreeEntries(); + total += thread[tid].numFreeStoreEntries(); } return total; @@ -462,23 +457,27 @@ LSQ::numFreeEntries() template unsigned -LSQ::numFreeEntries(unsigned tid) +LSQ::numFreeLoadEntries(ThreadID tid) +{ + return thread[tid].numFreeLoadEntries(); +} + +template +unsigned +LSQ::numFreeStoreEntries(ThreadID tid) { - //if( lsqPolicy == Dynamic ) - //return numFreeEntries(); - //else - return thread[tid].numFreeEntries(); + return thread[tid].numFreeStoreEntries(); } template bool LSQ::isFull() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (!(thread[tid].lqFull() || thread[tid].sqFull())) return false; @@ -489,25 +488,66 @@ LSQ::isFull() template bool -LSQ::isFull(unsigned tid) +LSQ::isFull(ThreadID tid) { //@todo: Change to Calculate All Entries for //Dynamic Policy - if (lsqPolicy == Dynamic) + if (lsqPolicy == SMTQueuePolicy::Dynamic) return isFull(); else return thread[tid].lqFull() || thread[tid].sqFull(); } +template +bool +LSQ::isEmpty() const +{ + return lqEmpty() && sqEmpty(); +} + +template +bool +LSQ::lqEmpty() const +{ + list::const_iterator threads = activeThreads->begin(); + list::const_iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].lqEmpty()) + return false; + } + + return true; +} + +template +bool +LSQ::sqEmpty() const +{ + list::const_iterator threads = activeThreads->begin(); + list::const_iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].sqEmpty()) + return false; + } + + return true; +} + template bool LSQ::lqFull() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (!thread[tid].lqFull()) return false; @@ -518,11 +558,11 @@ LSQ::lqFull() template bool -LSQ::lqFull(unsigned tid) +LSQ::lqFull(ThreadID tid) { //@todo: Change to Calculate All Entries for //Dynamic Policy - if( lsqPolicy == Dynamic ) + if (lsqPolicy == SMTQueuePolicy::Dynamic) return lqFull(); else return thread[tid].lqFull(); @@ -532,11 +572,11 @@ template bool LSQ::sqFull() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (!sqFull(tid)) return false; @@ -547,11 +587,11 @@ LSQ::sqFull() template bool -LSQ::sqFull(unsigned tid) +LSQ::sqFull(ThreadID tid) { //@todo: Change to Calculate All Entries for //Dynamic Policy - if( lsqPolicy == Dynamic ) + if (lsqPolicy == SMTQueuePolicy::Dynamic) return sqFull(); else return thread[tid].sqFull(); @@ -561,11 +601,11 @@ template bool LSQ::isStalled() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; if (!thread[tid].isStalled()) return false; @@ -576,9 +616,9 @@ LSQ::isStalled() template bool -LSQ::isStalled(unsigned tid) +LSQ::isStalled(ThreadID tid) { - if( lsqPolicy == Dynamic ) + if (lsqPolicy == SMTQueuePolicy::Dynamic) return isStalled(); else return thread[tid].isStalled(); @@ -588,49 +628,669 @@ template bool LSQ::hasStoresToWB() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - if (threads == end) - return false; + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; - if (!hasStoresToWB(tid)) - return false; + if (hasStoresToWB(tid)) + return true; } - return true; + return false; } template bool LSQ::willWB() { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; - if (!willWB(tid)) - return false; + if (willWB(tid)) + return true; } - return true; + return false; } template void -LSQ::dumpInsts() +LSQ::dumpInsts() const { - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); + list::const_iterator threads = activeThreads->begin(); + list::const_iterator end = activeThreads->end(); while (threads != end) { - unsigned tid = *threads++; + ThreadID tid = *threads++; thread[tid].dumpInsts(); } } + +template +Fault +LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, Request::Flags flags, + uint64_t *res, AtomicOpFunctorPtr amo_op, + const std::vector& byte_enable) +{ + // This comming request can be either load, store or atomic. + // Atomic request has a corresponding pointer to its atomic memory + // operation + bool isAtomic M5_VAR_USED = !isLoad && amo_op; + + ThreadID tid = cpu->contextToThread(inst->contextId()); + auto cacheLineSize = cpu->cacheLineSize(); + bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize); + LSQRequest* req = nullptr; + + // Atomic requests that access data across cache line boundary are + // currently not allowed since the cache does not guarantee corresponding + // atomic memory operations to be executed atomically across a cache line. + // For ISAs such as x86 that supports cross-cache-line atomic instructions, + // the cache needs to be modified to perform atomic update to both cache + // lines. For now, such cross-line update is not supported. + assert(!isAtomic || (isAtomic && !needs_burst)); + + const bool htm_cmd = isLoad && (flags & Request::HTM_CMD); + + if (inst->translationStarted()) { + req = inst->savedReq; + assert(req); + } else { + if (htm_cmd) { + assert(addr == 0x0lu); + assert(size == 8); + req = new HtmCmdRequest(&thread[tid], inst, flags); + } else if (needs_burst) { + req = new SplitDataRequest(&thread[tid], inst, isLoad, addr, + size, flags, data, res); + } else { + req = new SingleDataRequest(&thread[tid], inst, isLoad, addr, + size, flags, data, res, std::move(amo_op)); + } + assert(req); + if (!byte_enable.empty()) { + req->_byteEnable = byte_enable; + } + inst->setRequest(); + req->taskId(cpu->taskId()); + + // There might be fault from a previous execution attempt if this is + // a strictly ordered load + inst->getFault() = NoFault; + + req->initiateTranslation(); + } + + /* This is the place were instructions get the effAddr. */ + if (req->isTranslationComplete()) { + if (req->isMemAccessRequired()) { + inst->effAddr = req->getVaddr(); + inst->effSize = size; + inst->effAddrValid(true); + + if (cpu->checker) { + inst->reqToVerify = std::make_shared(*req->request()); + } + Fault fault; + if (isLoad) + fault = cpu->read(req, inst->lqIdx); + else + fault = cpu->write(req, data, inst->sqIdx); + // inst->getFault() may have the first-fault of a + // multi-access split request at this point. + // Overwrite that only if we got another type of fault + // (e.g. re-exec). + if (fault != NoFault) + inst->getFault() = fault; + } else if (isLoad) { + inst->setMemAccPredicate(false); + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + inst->setExecuted(); + } + } + + if (inst->traceData) + inst->traceData->setMem(addr, size, flags); + + return inst->getFault(); +} + +template +void +LSQ::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req, + ThreadContext* tc, BaseTLB::Mode mode) +{ + _fault.push_back(fault); + numInTranslationFragments = 0; + numTranslatedFragments = 1; + /* If the instruction has been squahsed, let the request know + * as it may have to self-destruct. */ + if (_inst->isSquashed()) { + this->squashTranslation(); + } else { + _inst->strictlyOrdered(req->isStrictlyOrdered()); + + flags.set(Flag::TranslationFinished); + if (fault == NoFault) { + _inst->physEffAddr = req->getPaddr(); + _inst->memReqFlags = req->getFlags(); + if (req->isCondSwap()) { + assert(_res); + req->setExtraData(*_res); + } + setState(State::Request); + } else { + setState(State::Fault); + } + + LSQRequest::_inst->fault = fault; + LSQRequest::_inst->translationCompleted(true); + } +} + +template +void +LSQ::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req, + ThreadContext* tc, BaseTLB::Mode mode) +{ + int i; + for (i = 0; i < _requests.size() && _requests[i] != req; i++); + assert(i < _requests.size()); + _fault[i] = fault; + + numInTranslationFragments--; + numTranslatedFragments++; + + if (fault == NoFault) + mainReq->setFlags(req->getFlags()); + + if (numTranslatedFragments == _requests.size()) { + if (_inst->isSquashed()) { + this->squashTranslation(); + } else { + _inst->strictlyOrdered(mainReq->isStrictlyOrdered()); + flags.set(Flag::TranslationFinished); + _inst->translationCompleted(true); + + for (i = 0; i < _fault.size() && _fault[i] == NoFault; i++); + if (i > 0) { + _inst->physEffAddr = request(0)->getPaddr(); + _inst->memReqFlags = mainReq->getFlags(); + if (mainReq->isCondSwap()) { + assert (i == _fault.size()); + assert(_res); + mainReq->setExtraData(*_res); + } + if (i == _fault.size()) { + _inst->fault = NoFault; + setState(State::Request); + } else { + _inst->fault = _fault[i]; + setState(State::PartialFault); + } + } else { + _inst->fault = _fault[0]; + setState(State::Fault); + } + } + + } +} + +template +void +LSQ::SingleDataRequest::initiateTranslation() +{ + assert(_requests.size() == 0); + + this->addRequest(_addr, _size, _byteEnable); + + if (_requests.size() > 0) { + _requests.back()->setReqInstSeqNum(_inst->seqNum); + _requests.back()->taskId(_taskId); + _inst->translationStarted(true); + setState(State::Translation); + flags.set(Flag::TranslationStarted); + + _inst->savedReq = this; + sendFragmentToTranslation(0); + } else { + _inst->setMemAccPredicate(false); + } +} + +template +PacketPtr +LSQ::SplitDataRequest::mainPacket() +{ + return _mainPacket; +} + +template +RequestPtr +LSQ::SplitDataRequest::mainRequest() +{ + return mainReq; +} + +template +void +LSQ::SplitDataRequest::initiateTranslation() +{ + auto cacheLineSize = _port.cacheLineSize(); + Addr base_addr = _addr; + Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize); + Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize); + uint32_t size_so_far = 0; + + mainReq = std::make_shared(base_addr, + _size, _flags, _inst->requestorId(), + _inst->instAddr(), _inst->contextId()); + if (!_byteEnable.empty()) { + mainReq->setByteEnable(_byteEnable); + } + + // Paddr is not used in mainReq. However, we will accumulate the flags + // from the sub requests into mainReq by calling setFlags() in finish(). + // setFlags() assumes that paddr is set so flip the paddr valid bit here to + // avoid a potential assert in setFlags() when we call it from finish(). + mainReq->setPaddr(0); + + /* Get the pre-fix, possibly unaligned. */ + if (_byteEnable.empty()) { + this->addRequest(base_addr, next_addr - base_addr, _byteEnable); + } else { + auto it_start = _byteEnable.begin(); + auto it_end = _byteEnable.begin() + (next_addr - base_addr); + this->addRequest(base_addr, next_addr - base_addr, + std::vector(it_start, it_end)); + } + size_so_far = next_addr - base_addr; + + /* We are block aligned now, reading whole blocks. */ + base_addr = next_addr; + while (base_addr != final_addr) { + if (_byteEnable.empty()) { + this->addRequest(base_addr, cacheLineSize, _byteEnable); + } else { + auto it_start = _byteEnable.begin() + size_so_far; + auto it_end = _byteEnable.begin() + size_so_far + cacheLineSize; + this->addRequest(base_addr, cacheLineSize, + std::vector(it_start, it_end)); + } + size_so_far += cacheLineSize; + base_addr += cacheLineSize; + } + + /* Deal with the tail. */ + if (size_so_far < _size) { + if (_byteEnable.empty()) { + this->addRequest(base_addr, _size - size_so_far, _byteEnable); + } else { + auto it_start = _byteEnable.begin() + size_so_far; + auto it_end = _byteEnable.end(); + this->addRequest(base_addr, _size - size_so_far, + std::vector(it_start, it_end)); + } + } + + if (_requests.size() > 0) { + /* Setup the requests and send them to translation. */ + for (auto& r: _requests) { + r->setReqInstSeqNum(_inst->seqNum); + r->taskId(_taskId); + } + + _inst->translationStarted(true); + setState(State::Translation); + flags.set(Flag::TranslationStarted); + this->_inst->savedReq = this; + numInTranslationFragments = 0; + numTranslatedFragments = 0; + _fault.resize(_requests.size()); + + for (uint32_t i = 0; i < _requests.size(); i++) { + sendFragmentToTranslation(i); + } + } else { + _inst->setMemAccPredicate(false); + } +} + +template +void +LSQ::LSQRequest::sendFragmentToTranslation(int i) +{ + numInTranslationFragments++; + _port.dTLB()->translateTiming( + this->request(i), + this->_inst->thread->getTC(), this, + this->isLoad() ? BaseTLB::Read : BaseTLB::Write); +} + +template +bool +LSQ::SingleDataRequest::recvTimingResp(PacketPtr pkt) +{ + assert(_numOutstandingPackets == 1); + auto state = dynamic_cast(pkt->senderState); + flags.set(Flag::Complete); + state->outstanding--; + assert(pkt == _packets.front()); + _port.completeDataAccess(pkt); + return true; +} + +template +bool +LSQ::SplitDataRequest::recvTimingResp(PacketPtr pkt) +{ + auto state = dynamic_cast(pkt->senderState); + uint32_t pktIdx = 0; + while (pktIdx < _packets.size() && pkt != _packets[pktIdx]) + pktIdx++; + assert(pktIdx < _packets.size()); + numReceivedPackets++; + state->outstanding--; + if (numReceivedPackets == _packets.size()) { + flags.set(Flag::Complete); + /* Assemble packets. */ + PacketPtr resp = isLoad() + ? Packet::createRead(mainReq) + : Packet::createWrite(mainReq); + if (isLoad()) + resp->dataStatic(_inst->memData); + else + resp->dataStatic(_data); + resp->senderState = _senderState; + _port.completeDataAccess(resp); + delete resp; + } + return true; +} + +template +void +LSQ::SingleDataRequest::buildPackets() +{ + assert(_senderState); + /* Retries do not create new packets. */ + if (_packets.size() == 0) { + _packets.push_back( + isLoad() + ? Packet::createRead(request()) + : Packet::createWrite(request())); + _packets.back()->dataStatic(_inst->memData); + _packets.back()->senderState = _senderState; + + // hardware transactional memory + // If request originates in a transaction (not necessarily a HtmCmd), + // then the packet should be marked as such. + if (_inst->inHtmTransactionalState()) { + _packets.back()->setHtmTransactional( + _inst->getHtmTransactionUid()); + + DPRINTF(HtmCpu, + "HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n", + isLoad() ? "LD" : "ST", + _inst->instAddr(), + _packets.back()->req->hasVaddr() ? + _packets.back()->req->getVaddr() : 0lu, + _packets.back()->getAddr(), + _inst->getHtmTransactionUid()); + } + } + assert(_packets.size() == 1); +} + +template +void +LSQ::SplitDataRequest::buildPackets() +{ + /* Extra data?? */ + Addr base_address = _addr; + + if (_packets.size() == 0) { + /* New stuff */ + if (isLoad()) { + _mainPacket = Packet::createRead(mainReq); + _mainPacket->dataStatic(_inst->memData); + + // hardware transactional memory + // If request originates in a transaction, + // packet should be marked as such + if (_inst->inHtmTransactionalState()) { + _mainPacket->setHtmTransactional( + _inst->getHtmTransactionUid()); + DPRINTF(HtmCpu, + "HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n", + _inst->instAddr(), + _mainPacket->req->hasVaddr() ? + _mainPacket->req->getVaddr() : 0lu, + _mainPacket->getAddr(), + _inst->getHtmTransactionUid()); + } + } + for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) { + RequestPtr r = _requests[i]; + PacketPtr pkt = isLoad() ? Packet::createRead(r) + : Packet::createWrite(r); + ptrdiff_t offset = r->getVaddr() - base_address; + if (isLoad()) { + pkt->dataStatic(_inst->memData + offset); + } else { + uint8_t* req_data = new uint8_t[r->getSize()]; + std::memcpy(req_data, + _inst->memData + offset, + r->getSize()); + pkt->dataDynamic(req_data); + } + pkt->senderState = _senderState; + _packets.push_back(pkt); + + // hardware transactional memory + // If request originates in a transaction, + // packet should be marked as such + if (_inst->inHtmTransactionalState()) { + _packets.back()->setHtmTransactional( + _inst->getHtmTransactionUid()); + DPRINTF(HtmCpu, + "HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n", + isLoad() ? "LD" : "ST", + i+1, + _inst->instAddr(), + _packets.back()->req->hasVaddr() ? + _packets.back()->req->getVaddr() : 0lu, + _packets.back()->getAddr(), + _inst->getHtmTransactionUid()); + } + } + } + assert(_packets.size() > 0); +} + +template +void +LSQ::SingleDataRequest::sendPacketToCache() +{ + assert(_numOutstandingPackets == 0); + if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0))) + _numOutstandingPackets = 1; +} + +template +void +LSQ::SplitDataRequest::sendPacketToCache() +{ + /* Try to send the packets. */ + while (numReceivedPackets + _numOutstandingPackets < _packets.size() && + lsqUnit()->trySendPacket(isLoad(), + _packets.at(numReceivedPackets + _numOutstandingPackets))) { + _numOutstandingPackets++; + } +} + +template +Cycles +LSQ::SingleDataRequest::handleLocalAccess( + ThreadContext *thread, PacketPtr pkt) +{ + return pkt->req->localAccessor(thread, pkt); +} + +template +Cycles +LSQ::SplitDataRequest::handleLocalAccess( + ThreadContext *thread, PacketPtr mainPkt) +{ + Cycles delay(0); + unsigned offset = 0; + + for (auto r: _requests) { + PacketPtr pkt = + new Packet(r, isLoad() ? MemCmd::ReadReq : MemCmd::WriteReq); + pkt->dataStatic(mainPkt->getPtr() + offset); + Cycles d = r->localAccessor(thread, pkt); + if (d > delay) + delay = d; + offset += r->getSize(); + delete pkt; + } + return delay; +} + +template +bool +LSQ::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) +{ + return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr); +} + +/** + * Caches may probe into the load-store queue to enforce memory ordering + * guarantees. This method supports probes by providing a mechanism to compare + * snoop messages with requests tracked by the load-store queue. + * + * Consistency models must enforce ordering constraints. TSO, for instance, + * must prevent memory reorderings except stores which are reordered after + * loads. The reordering restrictions negatively impact performance by + * cutting down on memory level parallelism. However, the core can regain + * performance by generating speculative loads. Speculative loads may issue + * without affecting correctness if precautions are taken to handle invalid + * memory orders. The load queue must squash under memory model violations. + * Memory model violations may occur when block ownership is granted to + * another core or the block cannot be accurately monitored by the load queue. + */ +template +bool +LSQ::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask) +{ + bool is_hit = false; + for (auto &r: _requests) { + /** + * The load-store queue handles partial faults which complicates this + * method. Physical addresses must be compared between requests and + * snoops. Some requests will not have a valid physical address, since + * partial faults may have outstanding translations. Therefore, the + * existence of a valid request address must be checked before + * comparing block hits. We assume no pipeline squash is needed if a + * valid request address does not exist. + */ + if (r->hasPaddr() && (r->getPaddr() & blockMask) == blockAddr) { + is_hit = true; + break; + } + } + return is_hit; +} + +template +bool +LSQ::DcachePort::recvTimingResp(PacketPtr pkt) +{ + return lsq->recvTimingResp(pkt); +} + +template +void +LSQ::DcachePort::recvTimingSnoopReq(PacketPtr pkt) +{ + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(tid); + } + } + lsq->recvTimingSnoopReq(pkt); +} + +template +void +LSQ::DcachePort::recvReqRetry() +{ + lsq->recvReqRetry(); +} + +template +LSQ::HtmCmdRequest::HtmCmdRequest(LSQUnit* port, + const DynInstPtr& inst, + const Request::Flags& flags_) : + SingleDataRequest(port, inst, true, 0x0lu, 8, flags_, + nullptr, nullptr, nullptr) +{ + assert(_requests.size() == 0); + + this->addRequest(_addr, _size, _byteEnable); + + if (_requests.size() > 0) { + _requests.back()->setReqInstSeqNum(_inst->seqNum); + _requests.back()->taskId(_taskId); + _requests.back()->setPaddr(_addr); + _requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts()); + + _inst->strictlyOrdered(_requests.back()->isStrictlyOrdered()); + _inst->fault = NoFault; + _inst->physEffAddr = _requests.back()->getPaddr(); + _inst->memReqFlags = _requests.back()->getFlags(); + _inst->savedReq = this; + + setState(State::Translation); + } else { + panic("unexpected behaviour"); + } +} + +template +void +LSQ::HtmCmdRequest::initiateTranslation() +{ + // Transaction commands are implemented as loads to avoid significant + // changes to the cpu and memory interfaces + // The virtual and physical address uses a dummy value of 0x00 + // Address translation does not really occur thus the code below + + flags.set(Flag::TranslationStarted); + flags.set(Flag::TranslationFinished); + + _inst->translationStarted(true); + _inst->translationCompleted(true); + + setState(State::Request); +} + +template +void +LSQ::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req, + ThreadContext* tc, BaseTLB::Mode mode) +{ + panic("unexpected behaviour"); +} + +#endif//__CPU_O3_LSQ_IMPL_HH__