X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmem%2Fcache%2Fbase.cc;h=fcf03741c5eea503cc3648015a0b922d1f9b819f;hb=e0de180ee4eb3316d2dd0d20320f2a1b310101bc;hp=ba981b606b6a82da858d613843cafe9742d540ff;hpb=0d68d36b9d12c36e6201fa8bc4bec34258c04eab;p=gem5.git diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index ba981b606..fcf03741c 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013, 2018-2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Erik Hallnor + * Nikos Nikoleris */ /** @@ -45,343 +46,2141 @@ * Definition of BaseCache functions. */ -#include "cpu/base.hh" -#include "cpu/smt.hh" -#include "debug/Cache.hh" -#include "debug/Drain.hh" -#include "mem/cache/tags/fa_lru.hh" -#include "mem/cache/tags/lru.hh" #include "mem/cache/base.hh" -#include "mem/cache/cache.hh" + +#include "base/compiler.hh" +#include "base/logging.hh" +#include "debug/Cache.hh" +#include "debug/CacheComp.hh" +#include "debug/CachePort.hh" +#include "debug/CacheRepl.hh" +#include "debug/CacheVerbose.hh" +#include "mem/cache/compressors/base.hh" #include "mem/cache/mshr.hh" -#include "sim/full_system.hh" +#include "mem/cache/prefetch/base.hh" +#include "mem/cache/queue_entry.hh" +#include "mem/cache/tags/super_blk.hh" +#include "params/BaseCache.hh" +#include "params/WriteAllocator.hh" +#include "sim/core.hh" using namespace std; BaseCache::CacheSlavePort::CacheSlavePort(const std::string &_name, BaseCache *_cache, const std::string &_label) - : QueuedSlavePort(_name, _cache, queue), queue(*_cache, *this, _label), - blocked(false), mustSendRetry(false), sendRetryEvent(this) + : QueuedSlavePort(_name, _cache, queue), + queue(*_cache, *this, true, _label), + blocked(false), mustSendRetry(false), + sendRetryEvent([this]{ processSendRetry(); }, _name) +{ +} + +BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size) + : ClockedObject(p), + cpuSidePort (p->name + ".cpu_side", this, "CpuSidePort"), + memSidePort(p->name + ".mem_side", this, "MemSidePort"), + mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below + writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below + tags(p->tags), + compressor(p->compressor), + prefetcher(p->prefetcher), + writeAllocator(p->write_allocator), + writebackClean(p->writeback_clean), + tempBlockWriteback(nullptr), + writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); }, + name(), false, + EventBase::Delayed_Writeback_Pri), + blkSize(blk_size), + lookupLatency(p->tag_latency), + dataLatency(p->data_latency), + forwardLatency(p->tag_latency), + fillLatency(p->data_latency), + responseLatency(p->response_latency), + sequentialAccess(p->sequential_access), + numTarget(p->tgts_per_mshr), + forwardSnoops(true), + clusivity(p->clusivity), + isReadOnly(p->is_read_only), + blocked(0), + order(0), + noTargetMSHR(nullptr), + missCount(p->max_miss_count), + addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()), + system(p->system), + stats(*this) +{ + // the MSHR queue has no reserve entries as we check the MSHR + // queue on every single allocation, whereas the write queue has + // as many reserve entries as we have MSHRs, since every MSHR may + // eventually require a writeback, and we do not check the write + // buffer before committing to an MSHR + + // forward snoops is overridden in init() once we can query + // whether the connected master is actually snooping or not + + tempBlock = new TempCacheBlk(blkSize); + + tags->tagsInit(); + if (prefetcher) + prefetcher->setCache(this); +} + +BaseCache::~BaseCache() +{ + delete tempBlock; +} + +void +BaseCache::CacheSlavePort::setBlocked() +{ + assert(!blocked); + DPRINTF(CachePort, "Port is blocking new requests\n"); + blocked = true; + // if we already scheduled a retry in this cycle, but it has not yet + // happened, cancel it + if (sendRetryEvent.scheduled()) { + owner.deschedule(sendRetryEvent); + DPRINTF(CachePort, "Port descheduled retry\n"); + mustSendRetry = true; + } +} + +void +BaseCache::CacheSlavePort::clearBlocked() +{ + assert(blocked); + DPRINTF(CachePort, "Port is accepting new requests\n"); + blocked = false; + if (mustSendRetry) { + // @TODO: need to find a better time (next cycle?) + owner.schedule(sendRetryEvent, curTick() + 1); + } +} + +void +BaseCache::CacheSlavePort::processSendRetry() +{ + DPRINTF(CachePort, "Port is sending retry\n"); + + // reset the flag and call retry + mustSendRetry = false; + sendRetryReq(); +} + +Addr +BaseCache::regenerateBlkAddr(CacheBlk* blk) +{ + if (blk != tempBlock) { + return tags->regenerateBlkAddr(blk); + } else { + return tempBlock->getAddr(); + } +} + +void +BaseCache::init() +{ + if (!cpuSidePort.isConnected() || !memSidePort.isConnected()) + fatal("Cache ports on %s are not connected\n", name()); + cpuSidePort.sendRangeChange(); + forwardSnoops = cpuSidePort.isSnooping(); +} + +Port & +BaseCache::getPort(const std::string &if_name, PortID idx) +{ + if (if_name == "mem_side") { + return memSidePort; + } else if (if_name == "cpu_side") { + return cpuSidePort; + } else { + return ClockedObject::getPort(if_name, idx); + } +} + +bool +BaseCache::inRange(Addr addr) const +{ + for (const auto& r : addrRanges) { + if (r.contains(addr)) { + return true; + } + } + return false; +} + +void +BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time) +{ + if (pkt->needsResponse()) { + // These delays should have been consumed by now + assert(pkt->headerDelay == 0); + assert(pkt->payloadDelay == 0); + + pkt->makeTimingResponse(); + + // In this case we are considering request_time that takes + // into account the delay of the xbar, if any, and just + // lat, neglecting responseLatency, modelling hit latency + // just as the value of lat overriden by access(), which calls + // the calculateAccessLatency() function. + cpuSidePort.schedTimingResp(pkt, request_time); + } else { + DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__, + pkt->print()); + + // queue the packet for deletion, as the sending cache is + // still relying on it; if the block is found in access(), + // CleanEvict and Writeback messages will be deleted + // here as well + pendingDelete.reset(pkt); + } +} + +void +BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk, + Tick forward_time, Tick request_time) +{ + if (writeAllocator && + pkt && pkt->isWrite() && !pkt->req->isUncacheable()) { + writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(), + pkt->getBlockAddr(blkSize)); + } + + if (mshr) { + /// MSHR hit + /// @note writebacks will be checked in getNextMSHR() + /// for any conflicting requests to the same block + + //@todo remove hw_pf here + + // Coalesce unless it was a software prefetch (see above). + if (pkt) { + assert(!pkt->isWriteback()); + // CleanEvicts corresponding to blocks which have + // outstanding requests in MSHRs are simply sunk here + if (pkt->cmd == MemCmd::CleanEvict) { + pendingDelete.reset(pkt); + } else if (pkt->cmd == MemCmd::WriteClean) { + // A WriteClean should never coalesce with any + // outstanding cache maintenance requests. + + // We use forward_time here because there is an + // uncached memory write, forwarded to WriteBuffer. + allocateWriteBuffer(pkt, forward_time); + } else { + DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__, + pkt->print()); + + assert(pkt->req->masterId() < system->maxMasters()); + stats.cmdStats(pkt).mshr_hits[pkt->req->masterId()]++; + + // We use forward_time here because it is the same + // considering new targets. We have multiple + // requests for the same address here. It + // specifies the latency to allocate an internal + // buffer and to schedule an event to the queued + // port and also takes into account the additional + // delay of the xbar. + mshr->allocateTarget(pkt, forward_time, order++, + allocOnFill(pkt->cmd)); + if (mshr->getNumTargets() == numTarget) { + noTargetMSHR = mshr; + setBlocked(Blocked_NoTargets); + // need to be careful with this... if this mshr isn't + // ready yet (i.e. time > curTick()), we don't want to + // move it ahead of mshrs that are ready + // mshrQueue.moveToFront(mshr); + } + } + } + } else { + // no MSHR + assert(pkt->req->masterId() < system->maxMasters()); + stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++; + + if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) { + // We use forward_time here because there is an + // writeback or writeclean, forwarded to WriteBuffer. + allocateWriteBuffer(pkt, forward_time); + } else { + if (blk && blk->isValid()) { + // If we have a write miss to a valid block, we + // need to mark the block non-readable. Otherwise + // if we allow reads while there's an outstanding + // write miss, the read could return stale data + // out of the cache block... a more aggressive + // system could detect the overlap (if any) and + // forward data out of the MSHRs, but we don't do + // that yet. Note that we do need to leave the + // block valid so that it stays in the cache, in + // case we get an upgrade response (and hence no + // new data) when the write miss completes. + // As long as CPUs do proper store/load forwarding + // internally, and have a sufficiently weak memory + // model, this is probably unnecessary, but at some + // point it must have seemed like we needed it... + assert((pkt->needsWritable() && !blk->isWritable()) || + pkt->req->isCacheMaintenance()); + blk->status &= ~BlkReadable; + } + // Here we are using forward_time, modelling the latency of + // a miss (outbound) just as forwardLatency, neglecting the + // lookupLatency component. + allocateMissBuffer(pkt, forward_time); + } + } +} + +void +BaseCache::recvTimingReq(PacketPtr pkt) +{ + // anything that is merely forwarded pays for the forward latency and + // the delay provided by the crossbar + Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + + Cycles lat; + CacheBlk *blk = nullptr; + bool satisfied = false; + { + PacketList writebacks; + // Note that lat is passed by reference here. The function + // access() will set the lat value. + satisfied = access(pkt, blk, lat, writebacks); + + // After the evicted blocks are selected, they must be forwarded + // to the write buffer to ensure they logically precede anything + // happening below + doWritebacks(writebacks, clockEdge(lat + forwardLatency)); + } + + // Here we charge the headerDelay that takes into account the latencies + // of the bus, if the packet comes from it. + // The latency charged is just the value set by the access() function. + // In case of a hit we are neglecting response latency. + // In case of a miss we are neglecting forward latency. + Tick request_time = clockEdge(lat); + // Here we reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + + if (satisfied) { + // notify before anything else as later handleTimingReqHit might turn + // the packet in a response + ppHit->notify(pkt); + + if (prefetcher && blk && blk->wasPrefetched()) { + blk->status &= ~BlkHWPrefetched; + } + + handleTimingReqHit(pkt, blk, request_time); + } else { + handleTimingReqMiss(pkt, blk, forward_time, request_time); + + ppMiss->notify(pkt); + } + + if (prefetcher) { + // track time of availability of next prefetch, if any + Tick next_pf_time = prefetcher->nextPrefetchReadyTime(); + if (next_pf_time != MaxTick) { + schedMemSideSendEvent(next_pf_time); + } + } +} + +void +BaseCache::handleUncacheableWriteResp(PacketPtr pkt) +{ + Tick completion_time = clockEdge(responseLatency) + + pkt->headerDelay + pkt->payloadDelay; + + // Reset the bus additional time as it is now accounted for + pkt->headerDelay = pkt->payloadDelay = 0; + + cpuSidePort.schedTimingResp(pkt, completion_time); +} + +void +BaseCache::recvTimingResp(PacketPtr pkt) +{ + assert(pkt->isResponse()); + + // all header delay should be paid for by the crossbar, unless + // this is a prefetch response from above + panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp, + "%s saw a non-zero packet delay\n", name()); + + const bool is_error = pkt->isError(); + + if (is_error) { + DPRINTF(Cache, "%s: Cache received %s with error\n", __func__, + pkt->print()); + } + + DPRINTF(Cache, "%s: Handling response %s\n", __func__, + pkt->print()); + + // if this is a write, we should be looking at an uncacheable + // write + if (pkt->isWrite()) { + assert(pkt->req->isUncacheable()); + handleUncacheableWriteResp(pkt); + return; + } + + // we have dealt with any (uncacheable) writes above, from here on + // we know we are dealing with an MSHR due to a miss or a prefetch + MSHR *mshr = dynamic_cast(pkt->popSenderState()); + assert(mshr); + + if (mshr == noTargetMSHR) { + // we always clear at least one target + clearBlocked(Blocked_NoTargets); + noTargetMSHR = nullptr; + } + + // Initial target is used just for stats + const QueueEntry::Target *initial_tgt = mshr->getTarget(); + const Tick miss_latency = curTick() - initial_tgt->recvTime; + if (pkt->req->isUncacheable()) { + assert(pkt->req->masterId() < system->maxMasters()); + stats.cmdStats(initial_tgt->pkt) + .mshr_uncacheable_lat[pkt->req->masterId()] += miss_latency; + } else { + assert(pkt->req->masterId() < system->maxMasters()); + stats.cmdStats(initial_tgt->pkt) + .mshr_miss_latency[pkt->req->masterId()] += miss_latency; + } + + PacketList writebacks; + + bool is_fill = !mshr->isForward && + (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp || + mshr->wasWholeLineWrite); + + // make sure that if the mshr was due to a whole line write then + // the response is an invalidation + assert(!mshr->wasWholeLineWrite || pkt->isInvalidate()); + + CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); + + if (is_fill && !is_error) { + DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n", + pkt->getAddr()); + + const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ? + writeAllocator->allocate() : mshr->allocOnFill(); + blk = handleFill(pkt, blk, writebacks, allocate); + assert(blk != nullptr); + ppFill->notify(pkt); + } + + if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) { + // The block was marked not readable while there was a pending + // cache maintenance operation, restore its flag. + blk->status |= BlkReadable; + + // This was a cache clean operation (without invalidate) + // and we have a copy of the block already. Since there + // is no invalidation, we can promote targets that don't + // require a writable copy + mshr->promoteReadable(); + } + + if (blk && blk->isWritable() && !pkt->req->isCacheInvalidate()) { + // If at this point the referenced block is writable and the + // response is not a cache invalidate, we promote targets that + // were deferred as we couldn't guarrantee a writable copy + mshr->promoteWritable(); + } + + serviceMSHRTargets(mshr, pkt, blk); + + if (mshr->promoteDeferredTargets()) { + // avoid later read getting stale data while write miss is + // outstanding.. see comment in timingAccess() + if (blk) { + blk->status &= ~BlkReadable; + } + mshrQueue.markPending(mshr); + schedMemSideSendEvent(clockEdge() + pkt->payloadDelay); + } else { + // while we deallocate an mshr from the queue we still have to + // check the isFull condition before and after as we might + // have been using the reserved entries already + const bool was_full = mshrQueue.isFull(); + mshrQueue.deallocate(mshr); + if (was_full && !mshrQueue.isFull()) { + clearBlocked(Blocked_NoMSHRs); + } + + // Request the bus for a prefetch if this deallocation freed enough + // MSHRs for a prefetch to take place + if (prefetcher && mshrQueue.canPrefetch()) { + Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(), + clockEdge()); + if (next_pf_time != MaxTick) + schedMemSideSendEvent(next_pf_time); + } + } + + // if we used temp block, check to see if its valid and then clear it out + if (blk == tempBlock && tempBlock->isValid()) { + evictBlock(blk, writebacks); + } + + const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + // copy writebacks to write buffer + doWritebacks(writebacks, forward_time); + + DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print()); + delete pkt; +} + + +Tick +BaseCache::recvAtomic(PacketPtr pkt) +{ + // should assert here that there are no outstanding MSHRs or + // writebacks... that would mean that someone used an atomic + // access in timing mode + + // We use lookupLatency here because it is used to specify the latency + // to access. + Cycles lat = lookupLatency; + + CacheBlk *blk = nullptr; + PacketList writebacks; + bool satisfied = access(pkt, blk, lat, writebacks); + + if (pkt->isClean() && blk && blk->isDirty()) { + // A cache clean opearation is looking for a dirty + // block. If a dirty block is encountered a WriteClean + // will update any copies to the path to the memory + // until the point of reference. + DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", + __func__, pkt->print(), blk->print()); + PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id); + writebacks.push_back(wb_pkt); + pkt->setSatisfied(); + } + + // handle writebacks resulting from the access here to ensure they + // logically precede anything happening below + doWritebacksAtomic(writebacks); + assert(writebacks.empty()); + + if (!satisfied) { + lat += handleAtomicReqMiss(pkt, blk, writebacks); + } + + // Note that we don't invoke the prefetcher at all in atomic mode. + // It's not clear how to do it properly, particularly for + // prefetchers that aggressively generate prefetch candidates and + // rely on bandwidth contention to throttle them; these will tend + // to pollute the cache in atomic mode since there is no bandwidth + // contention. If we ever do want to enable prefetching in atomic + // mode, though, this is the place to do it... see timingAccess() + // for an example (though we'd want to issue the prefetch(es) + // immediately rather than calling requestMemSideBus() as we do + // there). + + // do any writebacks resulting from the response handling + doWritebacksAtomic(writebacks); + + // if we used temp block, check to see if its valid and if so + // clear it out, but only do so after the call to recvAtomic is + // finished so that any downstream observers (such as a snoop + // filter), first see the fill, and only then see the eviction + if (blk == tempBlock && tempBlock->isValid()) { + // the atomic CPU calls recvAtomic for fetch and load/store + // sequentuially, and we may already have a tempBlock + // writeback from the fetch that we have not yet sent + if (tempBlockWriteback) { + // if that is the case, write the prevoius one back, and + // do not schedule any new event + writebackTempBlockAtomic(); + } else { + // the writeback/clean eviction happens after the call to + // recvAtomic has finished (but before any successive + // calls), so that the response handling from the fill is + // allowed to happen first + schedule(writebackTempBlockAtomicEvent, curTick()); + } + + tempBlockWriteback = evictBlock(blk); + } + + if (pkt->needsResponse()) { + pkt->makeAtomicResponse(); + } + + return lat * clockPeriod(); +} + +void +BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side) +{ + Addr blk_addr = pkt->getBlockAddr(blkSize); + bool is_secure = pkt->isSecure(); + CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure); + MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure); + + pkt->pushLabel(name()); + + CacheBlkPrintWrapper cbpw(blk); + + // Note that just because an L2/L3 has valid data doesn't mean an + // L1 doesn't have a more up-to-date modified copy that still + // needs to be found. As a result we always update the request if + // we have it, but only declare it satisfied if we are the owner. + + // see if we have data at all (owned or otherwise) + bool have_data = blk && blk->isValid() + && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize, + blk->data); + + // data we have is dirty if marked as such or if we have an + // in-service MSHR that is pending a modified line + bool have_dirty = + have_data && (blk->isDirty() || + (mshr && mshr->inService && mshr->isPendingModified())); + + bool done = have_dirty || + cpuSidePort.trySatisfyFunctional(pkt) || + mshrQueue.trySatisfyFunctional(pkt) || + writeBuffer.trySatisfyFunctional(pkt) || + memSidePort.trySatisfyFunctional(pkt); + + DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__, pkt->print(), + (blk && blk->isValid()) ? "valid " : "", + have_data ? "data " : "", done ? "done " : ""); + + // We're leaving the cache, so pop cache->name() label + pkt->popLabel(); + + if (done) { + pkt->makeResponse(); + } else { + // if it came as a request from the CPU side then make sure it + // continues towards the memory side + if (from_cpu_side) { + memSidePort.sendFunctional(pkt); + } else if (cpuSidePort.isSnooping()) { + // if it came from the memory side, it must be a snoop request + // and we should only forward it if we are forwarding snoops + cpuSidePort.sendFunctionalSnoop(pkt); + } + } +} + + +void +BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt) +{ + assert(pkt->isRequest()); + + uint64_t overwrite_val; + bool overwrite_mem; + uint64_t condition_val64; + uint32_t condition_val32; + + int offset = pkt->getOffset(blkSize); + uint8_t *blk_data = blk->data + offset; + + assert(sizeof(uint64_t) >= pkt->getSize()); + + overwrite_mem = true; + // keep a copy of our possible write value, and copy what is at the + // memory address into the packet + pkt->writeData((uint8_t *)&overwrite_val); + pkt->setData(blk_data); + + if (pkt->req->isCondSwap()) { + if (pkt->getSize() == sizeof(uint64_t)) { + condition_val64 = pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val64, blk_data, + sizeof(uint64_t)); + } else if (pkt->getSize() == sizeof(uint32_t)) { + condition_val32 = (uint32_t)pkt->req->getExtraData(); + overwrite_mem = !std::memcmp(&condition_val32, blk_data, + sizeof(uint32_t)); + } else + panic("Invalid size for conditional read/write\n"); + } + + if (overwrite_mem) { + std::memcpy(blk_data, &overwrite_val, pkt->getSize()); + blk->status |= BlkDirty; + } +} + +QueueEntry* +BaseCache::getNextQueueEntry() { + // Check both MSHR queue and write buffer for potential requests, + // note that null does not mean there is no request, it could + // simply be that it is not ready + MSHR *miss_mshr = mshrQueue.getNext(); + WriteQueueEntry *wq_entry = writeBuffer.getNext(); + + // If we got a write buffer request ready, first priority is a + // full write buffer, otherwise we favour the miss requests + if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) { + // need to search MSHR queue for conflicting earlier miss. + MSHR *conflict_mshr = mshrQueue.findPending(wq_entry); + + if (conflict_mshr && conflict_mshr->order < wq_entry->order) { + // Service misses in order until conflict is cleared. + return conflict_mshr; + + // @todo Note that we ignore the ready time of the conflict here + } + + // No conflicts; issue write + return wq_entry; + } else if (miss_mshr) { + // need to check for conflicting earlier writeback + WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr); + if (conflict_mshr) { + // not sure why we don't check order here... it was in the + // original code but commented out. + + // The only way this happens is if we are + // doing a write and we didn't have permissions + // then subsequently saw a writeback (owned got evicted) + // We need to make sure to perform the writeback first + // To preserve the dirty data, then we can issue the write + + // should we return wq_entry here instead? I.e. do we + // have to flush writes in order? I don't think so... not + // for Alpha anyway. Maybe for x86? + return conflict_mshr; + + // @todo Note that we ignore the ready time of the conflict here + } + + // No conflicts; issue read + return miss_mshr; + } + + // fall through... no pending requests. Try a prefetch. + assert(!miss_mshr && !wq_entry); + if (prefetcher && mshrQueue.canPrefetch()) { + // If we have a miss queue slot, we can try a prefetch + PacketPtr pkt = prefetcher->getPacket(); + if (pkt) { + Addr pf_addr = pkt->getBlockAddr(blkSize); + if (!tags->findBlock(pf_addr, pkt->isSecure()) && + !mshrQueue.findMatch(pf_addr, pkt->isSecure()) && + !writeBuffer.findMatch(pf_addr, pkt->isSecure())) { + // Update statistic on number of prefetches issued + // (hwpf_mshr_misses) + assert(pkt->req->masterId() < system->maxMasters()); + stats.cmdStats(pkt).mshr_misses[pkt->req->masterId()]++; + + // allocate an MSHR and return it, note + // that we send the packet straight away, so do not + // schedule the send + return allocateMissBuffer(pkt, curTick(), false); + } else { + // free the request and packet + delete pkt; + } + } + } + + return nullptr; } -BaseCache::BaseCache(const Params *p) - : MemObject(p), - mshrQueue("MSHRs", p->mshrs, 4, MSHRQueue_MSHRs), - writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, - MSHRQueue_WriteBuffer), - blkSize(p->block_size), - hitLatency(p->hit_latency), - responseLatency(p->response_latency), - numTarget(p->tgts_per_mshr), - forwardSnoops(p->forward_snoops), - isTopLevel(p->is_top_level), - blocked(0), - noTargetMSHR(NULL), - missCount(p->max_miss_count), - addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()), - system(p->system) -{ -} +bool +BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data, + PacketList &writebacks) +{ + // tempBlock does not exist in the tags, so don't do anything for it. + if (blk == tempBlock) { + return true; + } + + // Get superblock of the given block + CompressionBlk* compression_blk = static_cast(blk); + const SuperBlk* superblock = static_cast( + compression_blk->getSectorBlock()); + + // The compressor is called to compress the updated data, so that its + // metadata can be updated. + std::size_t compression_size = 0; + Cycles compression_lat = Cycles(0); + Cycles decompression_lat = Cycles(0); + compressor->compress(data, compression_lat, decompression_lat, + compression_size); + + // If block's compression factor increased, it may not be co-allocatable + // anymore. If so, some blocks might need to be evicted to make room for + // the bigger block + + // Get previous compressed size + const std::size_t M5_VAR_USED prev_size = compression_blk->getSizeBits(); + + // Check if new data is co-allocatable + const bool is_co_allocatable = superblock->isCompressed(compression_blk) && + superblock->canCoAllocate(compression_size); + + // If block was compressed, possibly co-allocated with other blocks, and + // cannot be co-allocated anymore, one or more blocks must be evicted to + // make room for the expanded block. As of now we decide to evict the co- + // allocated blocks to make room for the expansion, but other approaches + // that take the replacement data of the superblock into account may + // generate better results + std::vector evict_blks; + const bool was_compressed = compression_blk->isCompressed(); + if (was_compressed && !is_co_allocatable) { + // Get all co-allocated blocks + for (const auto& sub_blk : superblock->blks) { + if (sub_blk->isValid() && (compression_blk != sub_blk)) { + // Check for transient state allocations. If any of the + // entries listed for eviction has a transient state, the + // allocation fails + const Addr repl_addr = regenerateBlkAddr(sub_blk); + const MSHR *repl_mshr = + mshrQueue.findMatch(repl_addr, sub_blk->isSecure()); + if (repl_mshr) { + DPRINTF(CacheRepl, "Aborting data expansion of %s due " \ + "to replacement of block in transient state: %s\n", + compression_blk->print(), sub_blk->print()); + // Too hard to replace block with transient state, so it + // cannot be evicted. Mark the update as failed and expect + // the caller to evict this block. Since this is called + // only when writebacks arrive, and packets do not contain + // compressed data, there is no need to decompress + compression_blk->setSizeBits(blkSize * 8); + compression_blk->setDecompressionLatency(Cycles(0)); + compression_blk->setUncompressed(); + return false; + } + + evict_blks.push_back(sub_blk); + } + } + + // Update the number of data expansions + stats.dataExpansions++; + + DPRINTF(CacheComp, "Data expansion: expanding [%s] from %d to %d bits" + "\n", blk->print(), prev_size, compression_size); + } + + // We always store compressed blocks when possible + if (is_co_allocatable) { + compression_blk->setCompressed(); + } else { + compression_blk->setUncompressed(); + } + compression_blk->setSizeBits(compression_size); + compression_blk->setDecompressionLatency(decompression_lat); + + // Evict valid blocks + for (const auto& evict_blk : evict_blks) { + if (evict_blk->isValid()) { + if (evict_blk->wasPrefetched()) { + stats.unusedPrefetches++; + } + evictBlock(evict_blk, writebacks); + } + } + + return true; +} + +void +BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool) +{ + assert(pkt->isRequest()); + + assert(blk && blk->isValid()); + // Occasionally this is not true... if we are a lower-level cache + // satisfying a string of Read and ReadEx requests from + // upper-level caches, a Read will mark the block as shared but we + // can satisfy a following ReadEx anyway since we can rely on the + // Read requester(s) to have buffered the ReadEx snoop and to + // invalidate their blocks after receiving them. + // assert(!pkt->needsWritable() || blk->isWritable()); + assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); + + // Check RMW operations first since both isRead() and + // isWrite() will be true for them + if (pkt->cmd == MemCmd::SwapReq) { + if (pkt->isAtomicOp()) { + // extract data from cache and save it into the data field in + // the packet as a return value from this atomic op + int offset = tags->extractBlkOffset(pkt->getAddr()); + uint8_t *blk_data = blk->data + offset; + pkt->setData(blk_data); + + // execute AMO operation + (*(pkt->getAtomicOp()))(blk_data); + + // set block status to dirty + blk->status |= BlkDirty; + } else { + cmpAndSwap(blk, pkt); + } + } else if (pkt->isWrite()) { + // we have the block in a writable state and can go ahead, + // note that the line may be also be considered writable in + // downstream caches along the path to memory, but always + // Exclusive, and never Modified + assert(blk->isWritable()); + // Write or WriteLine at the first cache with block in writable state + if (blk->checkWrite(pkt)) { + pkt->writeDataToBlock(blk->data, blkSize); + } + // Always mark the line as dirty (and thus transition to the + // Modified state) even if we are a failed StoreCond so we + // supply data to any snoops that have appended themselves to + // this cache before knowing the store will fail. + blk->status |= BlkDirty; + DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print()); + } else if (pkt->isRead()) { + if (pkt->isLLSC()) { + blk->trackLoadLocked(pkt); + } + + // all read responses have a data payload + assert(pkt->hasRespData()); + pkt->setDataFromBlock(blk->data, blkSize); + } else if (pkt->isUpgrade()) { + // sanity check + assert(!pkt->hasSharers()); + + if (blk->isDirty()) { + // we were in the Owned state, and a cache above us that + // has the line in Shared state needs to be made aware + // that the data it already has is in fact dirty + pkt->setCacheResponding(); + blk->status &= ~BlkDirty; + } + } else if (pkt->isClean()) { + blk->status &= ~BlkDirty; + } else { + assert(pkt->isInvalidate()); + invalidateBlock(blk); + DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__, + pkt->print()); + } +} + +///////////////////////////////////////////////////// +// +// Access path: requests coming in from the CPU side +// +///////////////////////////////////////////////////// +Cycles +BaseCache::calculateTagOnlyLatency(const uint32_t delay, + const Cycles lookup_lat) const +{ + // A tag-only access has to wait for the packet to arrive in order to + // perform the tag lookup. + return ticksToCycles(delay) + lookup_lat; +} + +Cycles +BaseCache::calculateAccessLatency(const CacheBlk* blk, const uint32_t delay, + const Cycles lookup_lat) const +{ + Cycles lat(0); + + if (blk != nullptr) { + // As soon as the access arrives, for sequential accesses first access + // tags, then the data entry. In the case of parallel accesses the + // latency is dictated by the slowest of tag and data latencies. + if (sequentialAccess) { + lat = ticksToCycles(delay) + lookup_lat + dataLatency; + } else { + lat = ticksToCycles(delay) + std::max(lookup_lat, dataLatency); + } + + // Check if the block to be accessed is available. If not, apply the + // access latency on top of when the block is ready to be accessed. + const Tick tick = curTick() + delay; + const Tick when_ready = blk->getWhenReady(); + if (when_ready > tick && + ticksToCycles(when_ready - tick) > lat) { + lat += ticksToCycles(when_ready - tick); + } + } else { + // In case of a miss, we neglect the data access in a parallel + // configuration (i.e., the data access will be stopped as soon as + // we find out it is a miss), and use the tag-only latency. + lat = calculateTagOnlyLatency(delay, lookup_lat); + } + + return lat; +} + +bool +BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat, + PacketList &writebacks) +{ + // sanity check + assert(pkt->isRequest()); + + chatty_assert(!(isReadOnly && pkt->isWrite()), + "Should never see a write in a read-only cache %s\n", + name()); + + // Access block in the tags + Cycles tag_latency(0); + blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency); + + DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(), + blk ? "hit " + blk->print() : "miss"); + + if (pkt->req->isCacheMaintenance()) { + // A cache maintenance operation is always forwarded to the + // memory below even if the block is found in dirty state. + + // We defer any changes to the state of the block until we + // create and mark as in service the mshr for the downstream + // packet. + + // Calculate access latency on top of when the packet arrives. This + // takes into account the bus delay. + lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); + + return false; + } + + if (pkt->isEviction()) { + // We check for presence of block in above caches before issuing + // Writeback or CleanEvict to write buffer. Therefore the only + // possible cases can be of a CleanEvict packet coming from above + // encountering a Writeback generated in this cache peer cache and + // waiting in the write buffer. Cases of upper level peer caches + // generating CleanEvict and Writeback or simply CleanEvict and + // CleanEvict almost simultaneously will be caught by snoops sent out + // by crossbar. + WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(), + pkt->isSecure()); + if (wb_entry) { + assert(wb_entry->getNumTargets() == 1); + PacketPtr wbPkt = wb_entry->getTarget()->pkt; + assert(wbPkt->isWriteback()); + + if (pkt->isCleanEviction()) { + // The CleanEvict and WritebackClean snoops into other + // peer caches of the same level while traversing the + // crossbar. If a copy of the block is found, the + // packet is deleted in the crossbar. Hence, none of + // the other upper level caches connected to this + // cache have the block, so we can clear the + // BLOCK_CACHED flag in the Writeback if set and + // discard the CleanEvict by returning true. + wbPkt->clearBlockCached(); + + // A clean evict does not need to access the data array + lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); + + return true; + } else { + assert(pkt->cmd == MemCmd::WritebackDirty); + // Dirty writeback from above trumps our clean + // writeback... discard here + // Note: markInService will remove entry from writeback buffer. + markInService(wb_entry); + delete wbPkt; + } + } + } + + // The critical latency part of a write depends only on the tag access + if (pkt->isWrite()) { + lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); + } + + // Writeback handling is special case. We can write the block into + // the cache without having a writeable copy (or any copy at all). + if (pkt->isWriteback()) { + assert(blkSize == pkt->getSize()); + + // we could get a clean writeback while we are having + // outstanding accesses to a block, do the simple thing for + // now and drop the clean writeback so that we do not upset + // any ordering/decisions about ownership already taken + if (pkt->cmd == MemCmd::WritebackClean && + mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) { + DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, " + "dropping\n", pkt->getAddr()); + + // A writeback searches for the block, then writes the data. + // As the writeback is being dropped, the data is not touched, + // and we just had to wait for the time to find a match in the + // MSHR. As of now assume a mshr queue search takes as long as + // a tag lookup for simplicity. + return true; + } + + if (!blk) { + // need to do a replacement + blk = allocateBlock(pkt, writebacks); + if (!blk) { + // no replaceable block available: give up, fwd to next level. + incMissCount(pkt); + return false; + } + + blk->status |= BlkReadable; + } else if (compressor) { + // This is an overwrite to an existing block, therefore we need + // to check for data expansion (i.e., block was compressed with + // a smaller size, and now it doesn't fit the entry anymore). + // If that is the case we might need to evict blocks. + if (!updateCompressionData(blk, pkt->getConstPtr(), + writebacks)) { + invalidateBlock(blk); + return false; + } + } + + // only mark the block dirty if we got a writeback command, + // and leave it as is for a clean writeback + if (pkt->cmd == MemCmd::WritebackDirty) { + // TODO: the coherent cache can assert(!blk->isDirty()); + blk->status |= BlkDirty; + } + // if the packet does not have sharers, it is passing + // writable, and we got the writeback in Modified or Exclusive + // state, if not we are in the Owned or Shared state + if (!pkt->hasSharers()) { + blk->status |= BlkWritable; + } + // nothing else to do; writeback doesn't expect response + assert(!pkt->needsResponse()); + pkt->writeDataToBlock(blk->data, blkSize); + DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print()); + incHitCount(pkt); + + // When the packet metadata arrives, the tag lookup will be done while + // the payload is arriving. Then the block will be ready to access as + // soon as the fill is done + blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + + std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay)); + + return true; + } else if (pkt->cmd == MemCmd::CleanEvict) { + // A CleanEvict does not need to access the data array + lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); + + if (blk) { + // Found the block in the tags, need to stop CleanEvict from + // propagating further down the hierarchy. Returning true will + // treat the CleanEvict like a satisfied write request and delete + // it. + return true; + } + // We didn't find the block here, propagate the CleanEvict further + // down the memory hierarchy. Returning false will treat the CleanEvict + // like a Writeback which could not find a replaceable block so has to + // go to next level. + return false; + } else if (pkt->cmd == MemCmd::WriteClean) { + // WriteClean handling is a special case. We can allocate a + // block directly if it doesn't exist and we can update the + // block immediately. The WriteClean transfers the ownership + // of the block as well. + assert(blkSize == pkt->getSize()); + + if (!blk) { + if (pkt->writeThrough()) { + // if this is a write through packet, we don't try to + // allocate if the block is not present + return false; + } else { + // a writeback that misses needs to allocate a new block + blk = allocateBlock(pkt, writebacks); + if (!blk) { + // no replaceable block available: give up, fwd to + // next level. + incMissCount(pkt); + return false; + } + + blk->status |= BlkReadable; + } + } else if (compressor) { + // This is an overwrite to an existing block, therefore we need + // to check for data expansion (i.e., block was compressed with + // a smaller size, and now it doesn't fit the entry anymore). + // If that is the case we might need to evict blocks. + if (!updateCompressionData(blk, pkt->getConstPtr(), + writebacks)) { + invalidateBlock(blk); + return false; + } + } + + // at this point either this is a writeback or a write-through + // write clean operation and the block is already in this + // cache, we need to update the data and the block flags + assert(blk); + // TODO: the coherent cache can assert(!blk->isDirty()); + if (!pkt->writeThrough()) { + blk->status |= BlkDirty; + } + // nothing else to do; writeback doesn't expect response + assert(!pkt->needsResponse()); + pkt->writeDataToBlock(blk->data, blkSize); + DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print()); + + incHitCount(pkt); + + // When the packet metadata arrives, the tag lookup will be done while + // the payload is arriving. Then the block will be ready to access as + // soon as the fill is done + blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + + std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay)); + + // If this a write-through packet it will be sent to cache below + return !pkt->writeThrough(); + } else if (blk && (pkt->needsWritable() ? blk->isWritable() : + blk->isReadable())) { + // OK to satisfy access + incHitCount(pkt); + + // Calculate access latency based on the need to access the data array + if (pkt->isRead()) { + lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency); + + // When a block is compressed, it must first be decompressed + // before being read. This adds to the access latency. + if (compressor) { + lat += compressor->getDecompressionLatency(blk); + } + } else { + lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency); + } + + satisfyRequest(pkt, blk); + maintainClusivity(pkt->fromCache(), blk); + + return true; + } + + // Can't satisfy access normally... either no block (blk == nullptr) + // or have block but need writable + + incMissCount(pkt); + + lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency); + + if (!blk && pkt->isLLSC() && pkt->isWrite()) { + // complete miss on store conditional... just give up now + pkt->req->setExtraData(0); + return true; + } + + return false; +} + +void +BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk) +{ + if (from_cache && blk && blk->isValid() && !blk->isDirty() && + clusivity == Enums::mostly_excl) { + // if we have responded to a cache, and our block is still + // valid, but not dirty, and this cache is mostly exclusive + // with respect to the cache above, drop the block + invalidateBlock(blk); + } +} + +CacheBlk* +BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, + bool allocate) +{ + assert(pkt->isResponse()); + Addr addr = pkt->getAddr(); + bool is_secure = pkt->isSecure(); +#if TRACING_ON + CacheBlk::State old_state = blk ? blk->status : 0; +#endif + + // When handling a fill, we should have no writes to this line. + assert(addr == pkt->getBlockAddr(blkSize)); + assert(!writeBuffer.findMatch(addr, is_secure)); + + if (!blk) { + // better have read new data... + assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp); + + // need to do a replacement if allocating, otherwise we stick + // with the temporary storage + blk = allocate ? allocateBlock(pkt, writebacks) : nullptr; + + if (!blk) { + // No replaceable block or a mostly exclusive + // cache... just use temporary storage to complete the + // current request and then get rid of it + blk = tempBlock; + tempBlock->insert(addr, is_secure); + DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr, + is_secure ? "s" : "ns"); + } + } else { + // existing block... probably an upgrade + // don't clear block status... if block is already dirty we + // don't want to lose that + } + + // Block is guaranteed to be valid at this point + assert(blk->isValid()); + assert(blk->isSecure() == is_secure); + assert(regenerateBlkAddr(blk) == addr); + + blk->status |= BlkReadable; + + // sanity check for whole-line writes, which should always be + // marked as writable as part of the fill, and then later marked + // dirty as part of satisfyRequest + if (pkt->cmd == MemCmd::InvalidateResp) { + assert(!pkt->hasSharers()); + } + + // here we deal with setting the appropriate state of the line, + // and we start by looking at the hasSharers flag, and ignore the + // cacheResponding flag (normally signalling dirty data) if the + // packet has sharers, thus the line is never allocated as Owned + // (dirty but not writable), and always ends up being either + // Shared, Exclusive or Modified, see Packet::setCacheResponding + // for more details + if (!pkt->hasSharers()) { + // we could get a writable line from memory (rather than a + // cache) even in a read-only cache, note that we set this bit + // even for a read-only cache, possibly revisit this decision + blk->status |= BlkWritable; + + // check if we got this via cache-to-cache transfer (i.e., from a + // cache that had the block in Modified or Owned state) + if (pkt->cacheResponding()) { + // we got the block in Modified state, and invalidated the + // owners copy + blk->status |= BlkDirty; + + chatty_assert(!isReadOnly, "Should never see dirty snoop response " + "in read-only cache %s\n", name()); + + } + } + + DPRINTF(Cache, "Block addr %#llx (%s) moving from state %x to %s\n", + addr, is_secure ? "s" : "ns", old_state, blk->print()); + + // if we got new data, copy it in (checking for a read response + // and a response that has data is the same in the end) + if (pkt->isRead()) { + // sanity checks + assert(pkt->hasData()); + assert(pkt->getSize() == blkSize); + + pkt->writeDataToBlock(blk->data, blkSize); + } + // The block will be ready when the payload arrives and the fill is done + blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay + + pkt->payloadDelay); + + return blk; +} + +CacheBlk* +BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks) +{ + // Get address + const Addr addr = pkt->getAddr(); + + // Get secure bit + const bool is_secure = pkt->isSecure(); + + // Block size and compression related access latency. Only relevant if + // using a compressor, otherwise there is no extra delay, and the block + // is fully sized + std::size_t blk_size_bits = blkSize*8; + Cycles compression_lat = Cycles(0); + Cycles decompression_lat = Cycles(0); + + // If a compressor is being used, it is called to compress data before + // insertion. Although in Gem5 the data is stored uncompressed, even if a + // compressor is used, the compression/decompression methods are called to + // calculate the amount of extra cycles needed to read or write compressed + // blocks. + if (compressor) { + compressor->compress(pkt->getConstPtr(), compression_lat, + decompression_lat, blk_size_bits); + } + + // Find replacement victim + std::vector evict_blks; + CacheBlk *victim = tags->findVictim(addr, is_secure, blk_size_bits, + evict_blks); + + // It is valid to return nullptr if there is no victim + if (!victim) + return nullptr; + + // Print victim block's information + DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print()); + + // Check for transient state allocations. If any of the entries listed + // for eviction has a transient state, the allocation fails + bool replacement = false; + for (const auto& blk : evict_blks) { + if (blk->isValid()) { + replacement = true; + + Addr repl_addr = regenerateBlkAddr(blk); + MSHR *repl_mshr = mshrQueue.findMatch(repl_addr, blk->isSecure()); + if (repl_mshr) { + // must be an outstanding upgrade or clean request + // on a block we're about to replace... + assert((!blk->isWritable() && repl_mshr->needsWritable()) || + repl_mshr->isCleaning()); + + // too hard to replace block with transient state + // allocation failed, block not inserted + return nullptr; + } + } + } + + // The victim will be replaced by a new entry, so increase the replacement + // counter if a valid block is being replaced + if (replacement) { + // Evict valid blocks associated to this victim block + for (const auto& blk : evict_blks) { + if (blk->isValid()) { + DPRINTF(CacheRepl, "Evicting %s (%#llx) to make room for " \ + "%#llx (%s)\n", blk->print(), regenerateBlkAddr(blk), + addr, is_secure); + + if (blk->wasPrefetched()) { + stats.unusedPrefetches++; + } + + evictBlock(blk, writebacks); + } + } + + stats.replacements++; + } + + // If using a compressor, set compression data. This must be done before + // block insertion, as compressed tags use this information. + if (compressor) { + compressor->setSizeBits(victim, blk_size_bits); + compressor->setDecompressionLatency(victim, decompression_lat); + } + + // Insert new block at victimized entry + tags->insertBlock(pkt, victim); + + return victim; +} + +void +BaseCache::invalidateBlock(CacheBlk *blk) +{ + // If handling a block present in the Tags, let it do its invalidation + // process, which will update stats and invalidate the block itself + if (blk != tempBlock) { + tags->invalidate(blk); + } else { + tempBlock->invalidate(); + } +} + +void +BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks) +{ + PacketPtr pkt = evictBlock(blk); + if (pkt) { + writebacks.push_back(pkt); + } +} + +PacketPtr +BaseCache::writebackBlk(CacheBlk *blk) +{ + chatty_assert(!isReadOnly || writebackClean, + "Writeback from read-only cache"); + assert(blk && blk->isValid() && (blk->isDirty() || writebackClean)); + + stats.writebacks[Request::wbMasterId]++; + + RequestPtr req = std::make_shared( + regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId); + + if (blk->isSecure()) + req->setFlags(Request::SECURE); + + req->taskId(blk->task_id); + + PacketPtr pkt = + new Packet(req, blk->isDirty() ? + MemCmd::WritebackDirty : MemCmd::WritebackClean); + + DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n", + pkt->print(), blk->isWritable(), blk->isDirty()); + + if (blk->isWritable()) { + // not asserting shared means we pass the block in modified + // state, mark our own block non-writeable + blk->status &= ~BlkWritable; + } else { + // we are in the Owned state, tell the receiver + pkt->setHasSharers(); + } + + // make sure the block is not marked dirty + blk->status &= ~BlkDirty; + + pkt->allocate(); + pkt->setDataFromBlock(blk->data, blkSize); + + // When a block is compressed, it must first be decompressed before being + // sent for writeback. + if (compressor) { + pkt->payloadDelay = compressor->getDecompressionLatency(blk); + } + + return pkt; +} + +PacketPtr +BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id) +{ + RequestPtr req = std::make_shared( + regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId); + + if (blk->isSecure()) { + req->setFlags(Request::SECURE); + } + req->taskId(blk->task_id); + + PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id); + + if (dest) { + req->setFlags(dest); + pkt->setWriteThrough(); + } + + DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(), + blk->isWritable(), blk->isDirty()); + + if (blk->isWritable()) { + // not asserting shared means we pass the block in modified + // state, mark our own block non-writeable + blk->status &= ~BlkWritable; + } else { + // we are in the Owned state, tell the receiver + pkt->setHasSharers(); + } + + // make sure the block is not marked dirty + blk->status &= ~BlkDirty; + + pkt->allocate(); + pkt->setDataFromBlock(blk->data, blkSize); + + // When a block is compressed, it must first be decompressed before being + // sent for writeback. + if (compressor) { + pkt->payloadDelay = compressor->getDecompressionLatency(blk); + } + + return pkt; +} + + +void +BaseCache::memWriteback() +{ + tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); }); +} + +void +BaseCache::memInvalidate() +{ + tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); }); +} + +bool +BaseCache::isDirty() const +{ + return tags->anyBlk([](CacheBlk &blk) { return blk.isDirty(); }); +} + +bool +BaseCache::coalesce() const +{ + return writeAllocator && writeAllocator->coalesce(); +} + +void +BaseCache::writebackVisitor(CacheBlk &blk) +{ + if (blk.isDirty()) { + assert(blk.isValid()); + + RequestPtr request = std::make_shared( + regenerateBlkAddr(&blk), blkSize, 0, Request::funcMasterId); + + request->taskId(blk.task_id); + if (blk.isSecure()) { + request->setFlags(Request::SECURE); + } + + Packet packet(request, MemCmd::WriteReq); + packet.dataStatic(blk.data); + + memSidePort.sendFunctional(&packet); + + blk.status &= ~BlkDirty; + } +} + +void +BaseCache::invalidateVisitor(CacheBlk &blk) +{ + if (blk.isDirty()) + warn_once("Invalidating dirty cache lines. " \ + "Expect things to break.\n"); + + if (blk.isValid()) { + assert(!blk.isDirty()); + invalidateBlock(&blk); + } +} + +Tick +BaseCache::nextQueueReadyTime() const +{ + Tick nextReady = std::min(mshrQueue.nextReadyTime(), + writeBuffer.nextReadyTime()); + + // Don't signal prefetch ready time if no MSHRs available + // Will signal once enoguh MSHRs are deallocated + if (prefetcher && mshrQueue.canPrefetch()) { + nextReady = std::min(nextReady, + prefetcher->nextPrefetchReadyTime()); + } + + return nextReady; +} + + +bool +BaseCache::sendMSHRQueuePacket(MSHR* mshr) +{ + assert(mshr); + + // use request from 1st target + PacketPtr tgt_pkt = mshr->getTarget()->pkt; + + DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print()); + + // if the cache is in write coalescing mode or (additionally) in + // no allocation mode, and we have a write packet with an MSHR + // that is not a whole-line write (due to incompatible flags etc), + // then reset the write mode + if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) { + if (!mshr->isWholeLineWrite()) { + // if we are currently write coalescing, hold on the + // MSHR as many cycles extra as we need to completely + // write a cache line + if (writeAllocator->delay(mshr->blkAddr)) { + Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod(); + DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow " + "for write coalescing\n", tgt_pkt->print(), delay); + mshrQueue.delay(mshr, delay); + return false; + } else { + writeAllocator->reset(); + } + } else { + writeAllocator->resetDelay(mshr->blkAddr); + } + } + + CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure); + + // either a prefetch that is not present upstream, or a normal + // MSHR request, proceed to get the packet to send downstream + PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(), + mshr->isWholeLineWrite()); + + mshr->isForward = (pkt == nullptr); + + if (mshr->isForward) { + // not a cache block request, but a response is expected + // make copy of current packet to forward, keep current + // copy for response handling + pkt = new Packet(tgt_pkt, false, true); + assert(!pkt->isWrite()); + } + + // play it safe and append (rather than set) the sender state, + // as forwarded packets may already have existing state + pkt->pushSenderState(mshr); + + if (pkt->isClean() && blk && blk->isDirty()) { + // A cache clean opearation is looking for a dirty block. Mark + // the packet so that the destination xbar can determine that + // there will be a follow-up write packet as well. + pkt->setSatisfied(); + } + + if (!memSidePort.sendTimingReq(pkt)) { + // we are awaiting a retry, but we + // delete the packet and will be creating a new packet + // when we get the opportunity + delete pkt; + + // note that we have now masked any requestBus and + // schedSendEvent (we will wait for a retry before + // doing anything), and this is so even if we do not + // care about this packet and might override it before + // it gets retried + return true; + } else { + // As part of the call to sendTimingReq the packet is + // forwarded to all neighbouring caches (and any caches + // above them) as a snoop. Thus at this point we know if + // any of the neighbouring caches are responding, and if + // so, we know it is dirty, and we can determine if it is + // being passed as Modified, making our MSHR the ordering + // point + bool pending_modified_resp = !pkt->hasSharers() && + pkt->cacheResponding(); + markInService(mshr, pending_modified_resp); + + if (pkt->isClean() && blk && blk->isDirty()) { + // A cache clean opearation is looking for a dirty + // block. If a dirty block is encountered a WriteClean + // will update any copies to the path to the memory + // until the point of reference. + DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n", + __func__, pkt->print(), blk->print()); + PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), + pkt->id); + PacketList writebacks; + writebacks.push_back(wb_pkt); + doWritebacks(writebacks, 0); + } + + return false; + } +} + +bool +BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry) +{ + assert(wq_entry); + + // always a single target for write queue entries + PacketPtr tgt_pkt = wq_entry->getTarget()->pkt; + + DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print()); + + // forward as is, both for evictions and uncacheable writes + if (!memSidePort.sendTimingReq(tgt_pkt)) { + // note that we have now masked any requestBus and + // schedSendEvent (we will wait for a retry before + // doing anything), and this is so even if we do not + // care about this packet and might override it before + // it gets retried + return true; + } else { + markInService(wq_entry); + return false; + } +} + +void +BaseCache::serialize(CheckpointOut &cp) const +{ + bool dirty(isDirty()); + + if (dirty) { + warn("*** The cache still contains dirty data. ***\n"); + warn(" Make sure to drain the system using the correct flags.\n"); + warn(" This checkpoint will not restore correctly " \ + "and dirty data in the cache will be lost!\n"); + } + + // Since we don't checkpoint the data in the cache, any dirty data + // will be lost when restoring from a checkpoint of a system that + // wasn't drained properly. Flag the checkpoint as invalid if the + // cache contains dirty data. + bool bad_checkpoint(dirty); + SERIALIZE_SCALAR(bad_checkpoint); +} + +void +BaseCache::unserialize(CheckpointIn &cp) +{ + bool bad_checkpoint; + UNSERIALIZE_SCALAR(bad_checkpoint); + if (bad_checkpoint) { + fatal("Restoring from checkpoints with dirty caches is not " + "supported in the classic memory system. Please remove any " + "caches or drain them properly before taking checkpoints.\n"); + } +} + + +BaseCache::CacheCmdStats::CacheCmdStats(BaseCache &c, + const std::string &name) + : Stats::Group(&c), cache(c), + + hits( + this, (name + "_hits").c_str(), + ("number of " + name + " hits").c_str()), + misses( + this, (name + "_misses").c_str(), + ("number of " + name + " misses").c_str()), + missLatency( + this, (name + "_miss_latency").c_str(), + ("number of " + name + " miss cycles").c_str()), + accesses( + this, (name + "_accesses").c_str(), + ("number of " + name + " accesses(hits+misses)").c_str()), + missRate( + this, (name + "_miss_rate").c_str(), + ("miss rate for " + name + " accesses").c_str()), + avgMissLatency( + this, (name + "_avg_miss_latency").c_str(), + ("average " + name + " miss latency").c_str()), + mshr_hits( + this, (name + "_mshr_hits").c_str(), + ("number of " + name + " MSHR hits").c_str()), + mshr_misses( + this, (name + "_mshr_misses").c_str(), + ("number of " + name + " MSHR misses").c_str()), + mshr_uncacheable( + this, (name + "_mshr_uncacheable").c_str(), + ("number of " + name + " MSHR uncacheable").c_str()), + mshr_miss_latency( + this, (name + "_mshr_miss_latency").c_str(), + ("number of " + name + " MSHR miss cycles").c_str()), + mshr_uncacheable_lat( + this, (name + "_mshr_uncacheable_latency").c_str(), + ("number of " + name + " MSHR uncacheable cycles").c_str()), + mshrMissRate( + this, (name + "_mshr_miss_rate").c_str(), + ("mshr miss rate for " + name + " accesses").c_str()), + avgMshrMissLatency( + this, (name + "_avg_mshr_miss_latency").c_str(), + ("average " + name + " mshr miss latency").c_str()), + avgMshrUncacheableLatency( + this, (name + "_avg_mshr_uncacheable_latency").c_str(), + ("average " + name + " mshr uncacheable latency").c_str()) +{ +} + +void +BaseCache::CacheCmdStats::regStatsFromParent() +{ + using namespace Stats; + + Stats::Group::regStats(); + System *system = cache.system; + const auto max_masters = system->maxMasters(); + + hits + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + hits.subname(i, system->getMasterName(i)); + } + + // Miss statistics + misses + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + misses.subname(i, system->getMasterName(i)); + } + + // Miss latency statistics + missLatency + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + missLatency.subname(i, system->getMasterName(i)); + } + + // access formulas + accesses.flags(total | nozero | nonan); + accesses = hits + misses; + for (int i = 0; i < max_masters; i++) { + accesses.subname(i, system->getMasterName(i)); + } + + // miss rate formulas + missRate.flags(total | nozero | nonan); + missRate = misses / accesses; + for (int i = 0; i < max_masters; i++) { + missRate.subname(i, system->getMasterName(i)); + } + + // miss latency formulas + avgMissLatency.flags(total | nozero | nonan); + avgMissLatency = missLatency / misses; + for (int i = 0; i < max_masters; i++) { + avgMissLatency.subname(i, system->getMasterName(i)); + } + + // MSHR statistics + // MSHR hit statistics + mshr_hits + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_hits.subname(i, system->getMasterName(i)); + } + + // MSHR miss statistics + mshr_misses + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_misses.subname(i, system->getMasterName(i)); + } -void -BaseCache::CacheSlavePort::setBlocked() -{ - assert(!blocked); - DPRINTF(CachePort, "Cache port %s blocking new requests\n", name()); - blocked = true; - // if we already scheduled a retry in this cycle, but it has not yet - // happened, cancel it - if (sendRetryEvent.scheduled()) { - owner.deschedule(sendRetryEvent); - DPRINTF(CachePort, "Cache port %s deschedule retry\n", name()); - mustSendRetry = true; + // MSHR miss latency statistics + mshr_miss_latency + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_miss_latency.subname(i, system->getMasterName(i)); } -} -void -BaseCache::CacheSlavePort::clearBlocked() -{ - assert(blocked); - DPRINTF(CachePort, "Cache port %s accepting new requests\n", name()); - blocked = false; - if (mustSendRetry) { - DPRINTF(CachePort, "Cache port %s sending retry\n", name()); - mustSendRetry = false; - // @TODO: need to find a better time (next bus cycle?) - owner.schedule(sendRetryEvent, curTick() + 1); + // MSHR uncacheable statistics + mshr_uncacheable + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_uncacheable.subname(i, system->getMasterName(i)); } -} + // MSHR miss latency statistics + mshr_uncacheable_lat + .init(max_masters) + .flags(total | nozero | nonan) + ; + for (int i = 0; i < max_masters; i++) { + mshr_uncacheable_lat.subname(i, system->getMasterName(i)); + } -void -BaseCache::init() -{ - if (!cpuSidePort->isConnected() || !memSidePort->isConnected()) - fatal("Cache ports on %s are not connected\n", name()); - cpuSidePort->sendRangeChange(); -} + // MSHR miss rate formulas + mshrMissRate.flags(total | nozero | nonan); + mshrMissRate = mshr_misses / accesses; -BaseMasterPort & -BaseCache::getMasterPort(const std::string &if_name, PortID idx) -{ - if (if_name == "mem_side") { - return *memSidePort; - } else { - return MemObject::getMasterPort(if_name, idx); + for (int i = 0; i < max_masters; i++) { + mshrMissRate.subname(i, system->getMasterName(i)); + } + + // mshrMiss latency formulas + avgMshrMissLatency.flags(total | nozero | nonan); + avgMshrMissLatency = mshr_miss_latency / mshr_misses; + for (int i = 0; i < max_masters; i++) { + avgMshrMissLatency.subname(i, system->getMasterName(i)); + } + + // mshrUncacheable latency formulas + avgMshrUncacheableLatency.flags(total | nozero | nonan); + avgMshrUncacheableLatency = mshr_uncacheable_lat / mshr_uncacheable; + for (int i = 0; i < max_masters; i++) { + avgMshrUncacheableLatency.subname(i, system->getMasterName(i)); } } -BaseSlavePort & -BaseCache::getSlavePort(const std::string &if_name, PortID idx) +BaseCache::CacheStats::CacheStats(BaseCache &c) + : Stats::Group(&c), cache(c), + + demandHits(this, "demand_hits", "number of demand (read+write) hits"), + + overallHits(this, "overall_hits", "number of overall hits"), + demandMisses(this, "demand_misses", + "number of demand (read+write) misses"), + overallMisses(this, "overall_misses", "number of overall misses"), + demandMissLatency(this, "demand_miss_latency", + "number of demand (read+write) miss cycles"), + overallMissLatency(this, "overall_miss_latency", + "number of overall miss cycles"), + demandAccesses(this, "demand_accesses", + "number of demand (read+write) accesses"), + overallAccesses(this, "overall_accesses", + "number of overall (read+write) accesses"), + demandMissRate(this, "demand_miss_rate", + "miss rate for demand accesses"), + overallMissRate(this, "overall_miss_rate", + "miss rate for overall accesses"), + demandAvgMissLatency(this, "demand_avg_miss_latency", + "average overall miss latency"), + overallAvgMissLatency(this, "overall_avg_miss_latency", + "average overall miss latency"), + blocked_cycles(this, "blocked_cycles", + "number of cycles access was blocked"), + blocked_causes(this, "blocked", "number of cycles access was blocked"), + avg_blocked(this, "avg_blocked_cycles", + "average number of cycles each access was blocked"), + unusedPrefetches(this, "unused_prefetches", + "number of HardPF blocks evicted w/o reference"), + writebacks(this, "writebacks", "number of writebacks"), + demandMshrHits(this, "demand_mshr_hits", + "number of demand (read+write) MSHR hits"), + overallMshrHits(this, "overall_mshr_hits", + "number of overall MSHR hits"), + demandMshrMisses(this, "demand_mshr_misses", + "number of demand (read+write) MSHR misses"), + overallMshrMisses(this, "overall_mshr_misses", + "number of overall MSHR misses"), + overallMshrUncacheable(this, "overall_mshr_uncacheable_misses", + "number of overall MSHR uncacheable misses"), + demandMshrMissLatency(this, "demand_mshr_miss_latency", + "number of demand (read+write) MSHR miss cycles"), + overallMshrMissLatency(this, "overall_mshr_miss_latency", + "number of overall MSHR miss cycles"), + overallMshrUncacheableLatency(this, "overall_mshr_uncacheable_latency", + "number of overall MSHR uncacheable cycles"), + demandMshrMissRate(this, "demand_mshr_miss_rate", + "mshr miss rate for demand accesses"), + overallMshrMissRate(this, "overall_mshr_miss_rate", + "mshr miss rate for overall accesses"), + demandAvgMshrMissLatency(this, "demand_avg_mshr_miss_latency", + "average overall mshr miss latency"), + overallAvgMshrMissLatency(this, "overall_avg_mshr_miss_latency", + "average overall mshr miss latency"), + overallAvgMshrUncacheableLatency( + this, "overall_avg_mshr_uncacheable_latency", + "average overall mshr uncacheable latency"), + replacements(this, "replacements", "number of replacements"), + + dataExpansions(this, "data_expansions", "number of data expansions"), + cmd(MemCmd::NUM_MEM_CMDS) { - if (if_name == "cpu_side") { - return *cpuSidePort; - } else { - return MemObject::getSlavePort(if_name, idx); - } + for (int idx = 0; idx < MemCmd::NUM_MEM_CMDS; ++idx) + cmd[idx].reset(new CacheCmdStats(c, MemCmd(idx).toString())); } void -BaseCache::regStats() +BaseCache::CacheStats::regStats() { using namespace Stats; - // Hit statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); + Stats::Group::regStats(); - hits[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_hits") - .desc("number of " + cstr + " hits") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - hits[access_idx].subname(i, system->getMasterName(i)); - } - } + System *system = cache.system; + const auto max_masters = system->maxMasters(); + + for (auto &cs : cmd) + cs->regStatsFromParent(); // These macros make it easier to sum the right subset of commands and // to change the subset of commands that are considered "demand" vs // "non-demand" -#define SUM_DEMAND(s) \ - (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::ReadExReq]) +#define SUM_DEMAND(s) \ + (cmd[MemCmd::ReadReq]->s + cmd[MemCmd::WriteReq]->s + \ + cmd[MemCmd::WriteLineReq]->s + cmd[MemCmd::ReadExReq]->s + \ + cmd[MemCmd::ReadCleanReq]->s + cmd[MemCmd::ReadSharedReq]->s) // should writebacks be included here? prior code was inconsistent... -#define SUM_NON_DEMAND(s) \ - (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq]) +#define SUM_NON_DEMAND(s) \ + (cmd[MemCmd::SoftPFReq]->s + cmd[MemCmd::HardPFReq]->s + \ + cmd[MemCmd::SoftPFExReq]->s) - demandHits - .name(name() + ".demand_hits") - .desc("number of demand (read+write) hits") - .flags(total | nozero | nonan) - ; + demandHits.flags(total | nozero | nonan); demandHits = SUM_DEMAND(hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandHits.subname(i, system->getMasterName(i)); } - overallHits - .name(name() + ".overall_hits") - .desc("number of overall hits") - .flags(total | nozero | nonan) - ; + overallHits.flags(total | nozero | nonan); overallHits = demandHits + SUM_NON_DEMAND(hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallHits.subname(i, system->getMasterName(i)); } - // Miss statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - misses[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_misses") - .desc("number of " + cstr + " misses") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - misses[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMisses - .name(name() + ".demand_misses") - .desc("number of demand (read+write) misses") - .flags(total | nozero | nonan) - ; + demandMisses.flags(total | nozero | nonan); demandMisses = SUM_DEMAND(misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMisses.subname(i, system->getMasterName(i)); } - overallMisses - .name(name() + ".overall_misses") - .desc("number of overall misses") - .flags(total | nozero | nonan) - ; + overallMisses.flags(total | nozero | nonan); overallMisses = demandMisses + SUM_NON_DEMAND(misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMisses.subname(i, system->getMasterName(i)); } - // Miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - missLatency[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_miss_latency") - .desc("number of " + cstr + " miss cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - missLatency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMissLatency - .name(name() + ".demand_miss_latency") - .desc("number of demand (read+write) miss cycles") - .flags(total | nozero | nonan) - ; + demandMissLatency.flags(total | nozero | nonan); demandMissLatency = SUM_DEMAND(missLatency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMissLatency.subname(i, system->getMasterName(i)); } - overallMissLatency - .name(name() + ".overall_miss_latency") - .desc("number of overall miss cycles") - .flags(total | nozero | nonan) - ; + overallMissLatency.flags(total | nozero | nonan); overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMissLatency.subname(i, system->getMasterName(i)); } - // access formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - accesses[access_idx] - .name(name() + "." + cstr + "_accesses") - .desc("number of " + cstr + " accesses(hits+misses)") - .flags(total | nozero | nonan) - ; - accesses[access_idx] = hits[access_idx] + misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - accesses[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandAccesses - .name(name() + ".demand_accesses") - .desc("number of demand (read+write) accesses") - .flags(total | nozero | nonan) - ; + demandAccesses.flags(total | nozero | nonan); demandAccesses = demandHits + demandMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAccesses.subname(i, system->getMasterName(i)); } - overallAccesses - .name(name() + ".overall_accesses") - .desc("number of overall (read+write) accesses") - .flags(total | nozero | nonan) - ; + overallAccesses.flags(total | nozero | nonan); overallAccesses = overallHits + overallMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAccesses.subname(i, system->getMasterName(i)); } - // miss rate formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - missRate[access_idx] - .name(name() + "." + cstr + "_miss_rate") - .desc("miss rate for " + cstr + " accesses") - .flags(total | nozero | nonan) - ; - missRate[access_idx] = misses[access_idx] / accesses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - missRate[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMissRate - .name(name() + ".demand_miss_rate") - .desc("miss rate for demand accesses") - .flags(total | nozero | nonan) - ; + demandMissRate.flags(total | nozero | nonan); demandMissRate = demandMisses / demandAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMissRate.subname(i, system->getMasterName(i)); } - overallMissRate - .name(name() + ".overall_miss_rate") - .desc("miss rate for overall accesses") - .flags(total | nozero | nonan) - ; + overallMissRate.flags(total | nozero | nonan); overallMissRate = overallMisses / overallAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMissRate.subname(i, system->getMasterName(i)); } - // miss latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMissLatency[access_idx] - .name(name() + "." + cstr + "_avg_miss_latency") - .desc("average " + cstr + " miss latency") - .flags(total | nozero | nonan) - ; - avgMissLatency[access_idx] = - missLatency[access_idx] / misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - avgMissLatency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandAvgMissLatency - .name(name() + ".demand_avg_miss_latency") - .desc("average overall miss latency") - .flags(total | nozero | nonan) - ; + demandAvgMissLatency.flags(total | nozero | nonan); demandAvgMissLatency = demandMissLatency / demandMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAvgMissLatency.subname(i, system->getMasterName(i)); } - overallAvgMissLatency - .name(name() + ".overall_avg_miss_latency") - .desc("average overall miss latency") - .flags(total | nozero | nonan) - ; + overallAvgMissLatency.flags(total | nozero | nonan); overallAvgMissLatency = overallMissLatency / overallMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAvgMissLatency.subname(i, system->getMasterName(i)); } blocked_cycles.init(NUM_BLOCKED_CAUSES); blocked_cycles - .name(name() + ".blocked_cycles") - .desc("number of cycles access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; @@ -389,397 +2188,337 @@ BaseCache::regStats() blocked_causes.init(NUM_BLOCKED_CAUSES); blocked_causes - .name(name() + ".blocked") - .desc("number of cycles access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; avg_blocked - .name(name() + ".avg_blocked_cycles") - .desc("average number of cycles each access was blocked") .subname(Blocked_NoMSHRs, "no_mshrs") .subname(Blocked_NoTargets, "no_targets") ; - avg_blocked = blocked_cycles / blocked_causes; - fastWrites - .name(name() + ".fast_writes") - .desc("number of fast writes performed") - ; - - cacheCopies - .name(name() + ".cache_copies") - .desc("number of cache copies performed") - ; + unusedPrefetches.flags(nozero); writebacks - .init(system->maxMasters()) - .name(name() + ".writebacks") - .desc("number of writebacks") + .init(max_masters) .flags(total | nozero | nonan) ; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { writebacks.subname(i, system->getMasterName(i)); } - // MSHR statistics - // MSHR hit statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_hits[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_hits") - .desc("number of " + cstr + " MSHR hits") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_hits[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrHits - .name(name() + ".demand_mshr_hits") - .desc("number of demand (read+write) MSHR hits") - .flags(total | nozero | nonan) - ; + demandMshrHits.flags(total | nozero | nonan); demandMshrHits = SUM_DEMAND(mshr_hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrHits.subname(i, system->getMasterName(i)); } - overallMshrHits - .name(name() + ".overall_mshr_hits") - .desc("number of overall MSHR hits") - .flags(total | nozero | nonan) - ; + overallMshrHits.flags(total | nozero | nonan); overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrHits.subname(i, system->getMasterName(i)); } - // MSHR miss statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_misses[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_misses") - .desc("number of " + cstr + " MSHR misses") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_misses[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMisses - .name(name() + ".demand_mshr_misses") - .desc("number of demand (read+write) MSHR misses") - .flags(total | nozero | nonan) - ; + demandMshrMisses.flags(total | nozero | nonan); demandMshrMisses = SUM_DEMAND(mshr_misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMisses.subname(i, system->getMasterName(i)); } - overallMshrMisses - .name(name() + ".overall_mshr_misses") - .desc("number of overall MSHR misses") - .flags(total | nozero | nonan) - ; + overallMshrMisses.flags(total | nozero | nonan); overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMisses.subname(i, system->getMasterName(i)); } - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_miss_latency[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_miss_latency") - .desc("number of " + cstr + " MSHR miss cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_miss_latency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMissLatency - .name(name() + ".demand_mshr_miss_latency") - .desc("number of demand (read+write) MSHR miss cycles") - .flags(total | nozero | nonan) - ; + demandMshrMissLatency.flags(total | nozero | nonan); demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMissLatency.subname(i, system->getMasterName(i)); } - overallMshrMissLatency - .name(name() + ".overall_mshr_miss_latency") - .desc("number of overall MSHR miss cycles") - .flags(total | nozero | nonan) - ; + overallMshrMissLatency.flags(total | nozero | nonan); overallMshrMissLatency = demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMissLatency.subname(i, system->getMasterName(i)); } - // MSHR uncacheable statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_uncacheable") - .desc("number of " + cstr + " MSHR uncacheable") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_uncacheable[access_idx].subname(i, system->getMasterName(i)); - } - } - - overallMshrUncacheable - .name(name() + ".overall_mshr_uncacheable_misses") - .desc("number of overall MSHR uncacheable misses") - .flags(total | nozero | nonan) - ; + overallMshrUncacheable.flags(total | nozero | nonan); overallMshrUncacheable = SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrUncacheable.subname(i, system->getMasterName(i)); } - // MSHR miss latency statistics - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshr_uncacheable_lat[access_idx] - .init(system->maxMasters()) - .name(name() + "." + cstr + "_mshr_uncacheable_latency") - .desc("number of " + cstr + " MSHR uncacheable cycles") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_uncacheable_lat[access_idx].subname(i, system->getMasterName(i)); - } - } - overallMshrUncacheableLatency - .name(name() + ".overall_mshr_uncacheable_latency") - .desc("number of overall MSHR uncacheable cycles") - .flags(total | nozero | nonan) - ; + overallMshrUncacheableLatency.flags(total | nozero | nonan); overallMshrUncacheableLatency = SUM_DEMAND(mshr_uncacheable_lat) + SUM_NON_DEMAND(mshr_uncacheable_lat); - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrUncacheableLatency.subname(i, system->getMasterName(i)); } -#if 0 - // MSHR access formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshrAccesses[access_idx] - .name(name() + "." + cstr + "_mshr_accesses") - .desc("number of " + cstr + " mshr accesses(hits+misses)") - .flags(total | nozero | nonan) - ; - mshrAccesses[access_idx] = - mshr_hits[access_idx] + mshr_misses[access_idx] - + mshr_uncacheable[access_idx]; - } - - demandMshrAccesses - .name(name() + ".demand_mshr_accesses") - .desc("number of demand (read+write) mshr accesses") - .flags(total | nozero | nonan) - ; - demandMshrAccesses = demandMshrHits + demandMshrMisses; - - overallMshrAccesses - .name(name() + ".overall_mshr_accesses") - .desc("number of overall (read+write) mshr accesses") - .flags(total | nozero | nonan) - ; - overallMshrAccesses = overallMshrHits + overallMshrMisses - + overallMshrUncacheable; -#endif - - // MSHR miss rate formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - mshrMissRate[access_idx] - .name(name() + "." + cstr + "_mshr_miss_rate") - .desc("mshr miss rate for " + cstr + " accesses") - .flags(total | nozero | nonan) - ; - mshrMissRate[access_idx] = - mshr_misses[access_idx] / accesses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - mshrMissRate[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandMshrMissRate - .name(name() + ".demand_mshr_miss_rate") - .desc("mshr miss rate for demand accesses") - .flags(total | nozero | nonan) - ; + demandMshrMissRate.flags(total | nozero | nonan); demandMshrMissRate = demandMshrMisses / demandAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandMshrMissRate.subname(i, system->getMasterName(i)); } - overallMshrMissRate - .name(name() + ".overall_mshr_miss_rate") - .desc("mshr miss rate for overall accesses") - .flags(total | nozero | nonan) - ; + overallMshrMissRate.flags(total | nozero | nonan); overallMshrMissRate = overallMshrMisses / overallAccesses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallMshrMissRate.subname(i, system->getMasterName(i)); } - // mshrMiss latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); - - avgMshrMissLatency[access_idx] - .name(name() + "." + cstr + "_avg_mshr_miss_latency") - .desc("average " + cstr + " mshr miss latency") - .flags(total | nozero | nonan) - ; - avgMshrMissLatency[access_idx] = - mshr_miss_latency[access_idx] / mshr_misses[access_idx]; - - for (int i = 0; i < system->maxMasters(); i++) { - avgMshrMissLatency[access_idx].subname(i, system->getMasterName(i)); - } - } - - demandAvgMshrMissLatency - .name(name() + ".demand_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total | nozero | nonan) - ; + demandAvgMshrMissLatency.flags(total | nozero | nonan); demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { demandAvgMshrMissLatency.subname(i, system->getMasterName(i)); } - overallAvgMshrMissLatency - .name(name() + ".overall_avg_mshr_miss_latency") - .desc("average overall mshr miss latency") - .flags(total | nozero | nonan) - ; + overallAvgMshrMissLatency.flags(total | nozero | nonan); overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses; - for (int i = 0; i < system->maxMasters(); i++) { + for (int i = 0; i < max_masters; i++) { overallAvgMshrMissLatency.subname(i, system->getMasterName(i)); } - // mshrUncacheable latency formulas - for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) { - MemCmd cmd(access_idx); - const string &cstr = cmd.toString(); + overallAvgMshrUncacheableLatency.flags(total | nozero | nonan); + overallAvgMshrUncacheableLatency = + overallMshrUncacheableLatency / overallMshrUncacheable; + for (int i = 0; i < max_masters; i++) { + overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i)); + } + + dataExpansions.flags(nozero | nonan); +} + +void +BaseCache::regProbePoints() +{ + ppHit = new ProbePointArg(this->getProbeManager(), "Hit"); + ppMiss = new ProbePointArg(this->getProbeManager(), "Miss"); + ppFill = new ProbePointArg(this->getProbeManager(), "Fill"); +} + +/////////////// +// +// CpuSidePort +// +/////////////// +bool +BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt) +{ + // Snoops shouldn't happen when bypassing caches + assert(!cache->system->bypassCaches()); - avgMshrUncacheableLatency[access_idx] - .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency") - .desc("average " + cstr + " mshr uncacheable latency") - .flags(total | nozero | nonan) - ; - avgMshrUncacheableLatency[access_idx] = - mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx]; + assert(pkt->isResponse()); - for (int i = 0; i < system->maxMasters(); i++) { - avgMshrUncacheableLatency[access_idx].subname(i, system->getMasterName(i)); - } + // Express snoop responses from master to slave, e.g., from L1 to L2 + cache->recvTimingSnoopResp(pkt); + return true; +} + + +bool +BaseCache::CpuSidePort::tryTiming(PacketPtr pkt) +{ + if (cache->system->bypassCaches() || pkt->isExpressSnoop()) { + // always let express snoop packets through even if blocked + return true; + } else if (blocked || mustSendRetry) { + // either already committed to send a retry, or blocked + mustSendRetry = true; + return false; } + mustSendRetry = false; + return true; +} - overallAvgMshrUncacheableLatency - .name(name() + ".overall_avg_mshr_uncacheable_latency") - .desc("average overall mshr uncacheable latency") - .flags(total | nozero | nonan) - ; - overallAvgMshrUncacheableLatency = overallMshrUncacheableLatency / overallMshrUncacheable; - for (int i = 0; i < system->maxMasters(); i++) { - overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i)); +bool +BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt) +{ + assert(pkt->isRequest()); + + if (cache->system->bypassCaches()) { + // Just forward the packet if caches are disabled. + // @todo This should really enqueue the packet rather + bool M5_VAR_USED success = cache->memSidePort.sendTimingReq(pkt); + assert(success); + return true; + } else if (tryTiming(pkt)) { + cache->recvTimingReq(pkt); + return true; } + return false; +} - mshr_cap_events - .init(system->maxMasters()) - .name(name() + ".mshr_cap_events") - .desc("number of times MSHR cap was activated") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - mshr_cap_events.subname(i, system->getMasterName(i)); +Tick +BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt) +{ + if (cache->system->bypassCaches()) { + // Forward the request if the system is in cache bypass mode. + return cache->memSidePort.sendAtomic(pkt); + } else { + return cache->recvAtomic(pkt); } +} - //software prefetching stats - soft_prefetch_mshr_full - .init(system->maxMasters()) - .name(name() + ".soft_prefetch_mshr_full") - .desc("number of mshr full events for SW prefetching instrutions") - .flags(total | nozero | nonan) - ; - for (int i = 0; i < system->maxMasters(); i++) { - soft_prefetch_mshr_full.subname(i, system->getMasterName(i)); +void +BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt) +{ + if (cache->system->bypassCaches()) { + // The cache should be flushed if we are in cache bypass mode, + // so we don't need to check if we need to update anything. + cache->memSidePort.sendFunctional(pkt); + return; } - mshr_no_allocate_misses - .name(name() +".no_allocate_misses") - .desc("Number of misses that were no-allocate") - ; + // functional request + cache->functionalAccess(pkt, true); +} + +AddrRangeList +BaseCache::CpuSidePort::getAddrRanges() const +{ + return cache->getAddrRanges(); +} + +BaseCache:: +CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache, + const std::string &_label) + : CacheSlavePort(_name, _cache, _label), cache(_cache) +{ +} + +/////////////// +// +// MemSidePort +// +/////////////// +bool +BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt) +{ + cache->recvTimingResp(pkt); + return true; +} + +// Express snooping requests to memside port +void +BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt) +{ + // Snoops shouldn't happen when bypassing caches + assert(!cache->system->bypassCaches()); + + // handle snooping requests + cache->recvTimingSnoopReq(pkt); +} + +Tick +BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt) +{ + // Snoops shouldn't happen when bypassing caches + assert(!cache->system->bypassCaches()); + + return cache->recvAtomicSnoop(pkt); +} + +void +BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt) +{ + // Snoops shouldn't happen when bypassing caches + assert(!cache->system->bypassCaches()); + + // functional snoop (note that in contrast to atomic we don't have + // a specific functionalSnoop method, as they have the same + // behaviour regardless) + cache->functionalAccess(pkt, false); } -unsigned int -BaseCache::drain(DrainManager *dm) +void +BaseCache::CacheReqPacketQueue::sendDeferredPacket() { - int count = memSidePort->drain(dm) + cpuSidePort->drain(dm) + - mshrQueue.drain(dm) + writeBuffer.drain(dm); + // sanity check + assert(!waitingOnRetry); - // Set status - if (count != 0) { - setDrainState(Drainable::Draining); - DPRINTF(Drain, "Cache not drained\n"); - return count; + // there should never be any deferred request packets in the + // queue, instead we resly on the cache to provide the packets + // from the MSHR queue or write queue + assert(deferredPacketReadyTime() == MaxTick); + + // check for request packets (requests & writebacks) + QueueEntry* entry = cache.getNextQueueEntry(); + + if (!entry) { + // can happen if e.g. we attempt a writeback and fail, but + // before the retry, the writeback is eliminated because + // we snoop another cache's ReadEx. + } else { + // let our snoop responses go first if there are responses to + // the same addresses + if (checkConflictingSnoop(entry->getTarget()->pkt)) { + return; + } + waitingOnRetry = entry->sendPacket(cache); } - setDrainState(Drainable::Drained); - return 0; + // if we succeeded and are not waiting for a retry, schedule the + // next send considering when the next queue is ready, note that + // snoop responses have their own packet queue and thus schedule + // their own events + if (!waitingOnRetry) { + schedSendEvent(cache.nextQueueReadyTime()); + } } -BaseCache * -BaseCacheParams::create() +BaseCache::MemSidePort::MemSidePort(const std::string &_name, + BaseCache *_cache, + const std::string &_label) + : CacheMasterPort(_name, _cache, _reqQueue, _snoopRespQueue), + _reqQueue(*_cache, *this, _snoopRespQueue, _label), + _snoopRespQueue(*_cache, *this, true, _label), cache(_cache) { - int numSets = size / (assoc * block_size); +} - if (numSets == 1) { - FALRU *tags = new FALRU(block_size, size, hit_latency); - return new Cache(this, tags); +void +WriteAllocator::updateMode(Addr write_addr, unsigned write_size, + Addr blk_addr) +{ + // check if we are continuing where the last write ended + if (nextAddr == write_addr) { + delayCtr[blk_addr] = delayThreshold; + // stop if we have already saturated + if (mode != WriteMode::NO_ALLOCATE) { + byteCount += write_size; + // switch to streaming mode if we have passed the lower + // threshold + if (mode == WriteMode::ALLOCATE && + byteCount > coalesceLimit) { + mode = WriteMode::COALESCE; + DPRINTF(Cache, "Switched to write coalescing\n"); + } else if (mode == WriteMode::COALESCE && + byteCount > noAllocateLimit) { + // and continue and switch to non-allocating mode if we + // pass the upper threshold + mode = WriteMode::NO_ALLOCATE; + DPRINTF(Cache, "Switched to write-no-allocate\n"); + } + } } else { - LRU *tags = new LRU(numSets, block_size, assoc, hit_latency); - return new Cache(this, tags); + // we did not see a write matching the previous one, start + // over again + byteCount = write_size; + mode = WriteMode::ALLOCATE; + resetDelay(blk_addr); } + nextAddr = write_addr + write_size; +} + +WriteAllocator* +WriteAllocatorParams::create() +{ + return new WriteAllocator(this); }