X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmem%2Fcache%2Fcache_impl.hh;h=8d2806b8d2aeb96cec2da1ac9f8aabee02bdff79;hb=71aca6d29e686ecdec2828c8be1989f74d9b28d3;hp=c1b01d6762ed70a278561bd282a7e4035e1293e4;hpb=0cbcb715e0f6f2f7b1338d37e641ef931247748f;p=gem5.git diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index c1b01d676..8d2806b8d 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -1,5 +1,18 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2002-2005 The Regents of The University of Michigan + * Copyright (c) 2010 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,37 +50,37 @@ * Cache definitions. */ -#include "sim/host.hh" +#include "base/fast_alloc.hh" #include "base/misc.hh" - +#include "base/range.hh" +#include "base/types.hh" +#include "mem/cache/blk.hh" #include "mem/cache/cache.hh" -#include "mem/cache/cache_blk.hh" -#include "mem/cache/miss/mshr.hh" -#include "mem/cache/prefetch/base_prefetcher.hh" - -#include "sim/sim_exit.hh" // for SimExitEvent - +#include "mem/cache/mshr.hh" +#include "mem/cache/prefetch/base.hh" +#include "sim/sim_exit.hh" template -Cache::Cache(const std::string &_name, - Cache::Params ¶ms) - : BaseCache(_name, params.baseParams), - prefetchAccess(params.prefetchAccess), - tags(params.tags), - prefetcher(params.prefetcher), - doFastWrites(params.doFastWrites), - prefetchMiss(params.prefetchMiss) +Cache::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) + : BaseCache(p), + tags(tags), + prefetcher(pf), + doFastWrites(true), + prefetchOnAccess(p->prefetch_on_access) { tempBlock = new BlkType(); tempBlock->data = new uint8_t[blkSize]; - cpuSidePort = new CpuSidePort(_name + "-cpu_side_port", this); - memSidePort = new MemSidePort(_name + "-mem_side_port", this); + cpuSidePort = new CpuSidePort(p->name + "-cpu_side_port", this, + "CpuSidePort"); + memSidePort = new MemSidePort(p->name + "-mem_side_port", this, + "MemSidePort"); cpuSidePort->setOtherPort(memSidePort); memSidePort->setOtherPort(cpuSidePort); tags->setCache(this); - prefetcher->setCache(this); + if (prefetcher) + prefetcher->setCache(this); } template @@ -76,7 +89,8 @@ Cache::regStats() { BaseCache::regStats(); tags->regStats(name()); - prefetcher->regStats(name()); + if (prefetcher) + prefetcher->regStats(name()); } template @@ -88,7 +102,11 @@ Cache::getPort(const std::string &if_name, int idx) } else if (if_name == "mem_side") { return memSidePort; } else if (if_name == "functional") { - return new CpuSidePort(name() + "-cpu_side_funcport", this); + CpuSidePort *funcPort = + new CpuSidePort(name() + "-cpu_side_funcport", this, + "CpuSideFuncPort"); + funcPort->setOtherPort(memSidePort); + return funcPort; } else { panic("Port name %s unrecognized\n", if_name); } @@ -138,23 +156,27 @@ Cache::cmpAndSwap(BlkType *blk, PacketPtr pkt) panic("Invalid size for conditional read/write\n"); } - if (overwrite_mem) + if (overwrite_mem) { std::memcpy(blk_data, &overwrite_val, pkt->getSize()); + blk->status |= BlkDirty; + } } template void -Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) +Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk, + bool deferred_response, + bool pending_downgrade) { - assert(blk); + assert(blk && blk->isValid()); // Occasionally this is not true... if we are a lower-level cache // satisfying a string of Read and ReadEx requests from // upper-level caches, a Read will mark the block as shared but we // can satisfy a following ReadEx anyway since we can rely on the // Read requester(s) to have buffered the ReadEx snoop and to // invalidate their blocks after receiving them. - // assert(pkt->needsExclusive() ? blk->isWritable() : blk->isValid()); + // assert(!pkt->needsExclusive() || blk->isWritable()); assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize); // Check RMW operations first since both isRead() and @@ -163,11 +185,11 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) cmpAndSwap(blk, pkt); } else if (pkt->isWrite()) { if (blk->checkWrite(pkt)) { - blk->status |= BlkDirty; pkt->writeDataToBlock(blk->data, blkSize); + blk->status |= BlkDirty; } } else if (pkt->isRead()) { - if (pkt->isLocked()) { + if (pkt->isLLSC()) { blk->trackLoadLocked(pkt); } pkt->setDataFromBlock(blk->data, blkSize); @@ -175,20 +197,50 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) // special handling for coherent block requests from // upper-level caches if (pkt->needsExclusive()) { - // on ReadExReq we give up our copy + // if we have a dirty copy, make sure the recipient + // keeps it marked dirty + if (blk->isDirty()) { + pkt->assertMemInhibit(); + } + // on ReadExReq we give up our copy unconditionally tags->invalidateBlk(blk); + } else if (blk->isWritable() && !pending_downgrade + && !pkt->sharedAsserted()) { + // we can give the requester an exclusive copy (by not + // asserting shared line) on a read request if: + // - we have an exclusive copy at this level (& below) + // - we don't have a pending snoop from below + // signaling another read request + // - no other cache above has a copy (otherwise it + // would have asseretd shared line on request) + + if (blk->isDirty()) { + // special considerations if we're owner: + if (!deferred_response) { + // if we are responding immediately and can + // signal that we're transferring ownership + // along with exclusivity, do so + pkt->assertMemInhibit(); + blk->status &= ~BlkDirty; + } else { + // if we're responding after our own miss, + // there's a window where the recipient didn't + // know it was getting ownership and may not + // have responded to snoops correctly, so we + // can't pass off ownership *or* exclusivity + pkt->assertShared(); + } + } } else { - // on ReadReq we create shareable copies here and in - // the requester + // otherwise only respond with a shared copy pkt->assertShared(); - blk->status &= ~BlkWritable; } } } else { // Not a read or write... must be an upgrade. it's OK // to just ack those as long as we have an exclusive // copy at this level. - assert(pkt->cmd == MemCmd::UpgradeReq); + assert(pkt->isUpgrade()); tags->invalidateBlk(blk); } } @@ -203,9 +255,9 @@ Cache::satisfyCpuSideRequest(PacketPtr pkt, BlkType *blk) template void -Cache::markInService(MSHR *mshr) +Cache::markInService(MSHR *mshr, PacketPtr pkt) { - markInServiceInternal(mshr); + markInServiceInternal(mshr, pkt); #if 0 if (mshr->originalCmd == MemCmd::HardPFReq) { DPRINTF(HWPrefetch, "%s:Marking a HW_PF in service\n", @@ -250,78 +302,83 @@ Cache::squash(int threadNum) template bool -Cache::access(PacketPtr pkt, BlkType *&blk, int &lat) +Cache::access(PacketPtr pkt, BlkType *&blk, + int &lat, PacketList &writebacks) { - if (pkt->req->isUncacheable()) { + if (pkt->req->isUncacheable()) { + if (pkt->req->isClrex()) { + tags->clearLocks(); + } else { + blk = tags->findBlock(pkt->getAddr()); + if (blk != NULL) { + tags->invalidateBlk(blk); + } + } + blk = NULL; lat = hitLatency; return false; } - bool satisfied = false; // assume the worst - blk = tags->findBlock(pkt->getAddr(), lat); + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + blk = tags->accessBlock(pkt->getAddr(), lat, id); - if (prefetchAccess) { - //We are determining prefetches on access stream, call prefetcher - prefetcher->handleMiss(pkt, curTick); - } - - DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), - (blk) ? "hit" : "miss"); + DPRINTF(Cache, "%s%s %x %s\n", pkt->cmdString(), + pkt->req->isInstFetch() ? " (ifetch)" : "", + pkt->getAddr(), (blk) ? "hit" : "miss"); if (blk != NULL) { - // HIT - if (blk->isPrefetch()) { - //Signal that this was a hit under prefetch (no need for - //use prefetch (only can get here if true) - DPRINTF(HWPrefetch, "Hit a block that was prefetched\n"); - blk->status &= ~BlkHWPrefetched; - if (prefetchMiss) { - //If we are using the miss stream, signal the - //prefetcher otherwise the access stream would have - //already signaled this hit - prefetcher->handleMiss(pkt, curTick); - } - } - if (pkt->needsExclusive() ? blk->isWritable() : blk->isValid()) { + if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access - hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - satisfied = true; + incHitCount(pkt, id); satisfyCpuSideRequest(pkt, blk); - } else if (pkt->cmd == MemCmd::Writeback) { - // special case: writeback to read-only block (e.g., from - // L1 into L2). since we're really just passing ownership - // from one cache to another, we can update this cache to - // be the owner without making the block writeable - assert(!blk->isWritable() /* && !blk->isDirty() */); - assert(blkSize == pkt->getSize()); - std::memcpy(blk->data, pkt->getPtr(), blkSize); - blk->status |= BlkDirty; - satisfied = true; - // nothing else to do; writeback doesn't expect response - assert(!pkt->needsResponse()); - } else { - // permission violation... nothing to do here, leave unsatisfied - // for statistics purposes this counts like a complete miss - incMissCount(pkt); + return true; } - } else { - // complete miss (no matching block) - incMissCount(pkt); + } - if (pkt->isLocked() && pkt->isWrite()) { - // miss on store conditional... just give up now - pkt->req->setExtraData(0); - satisfied = true; + // Can't satisfy access normally... either no block (blk == NULL) + // or have block but need exclusive & only have shared. + + // Writeback handling is special case. We can write the block + // into the cache without having a writeable copy (or any copy at + // all). + if (pkt->cmd == MemCmd::Writeback) { + assert(blkSize == pkt->getSize()); + if (blk == NULL) { + // need to do a replacement + blk = allocateBlock(pkt->getAddr(), writebacks); + if (blk == NULL) { + // no replaceable block available, give up. + // writeback will be forwarded to next level. + incMissCount(pkt, id); + return false; + } + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + tags->insertBlock(pkt->getAddr(), blk, id); + blk->status = BlkValid | BlkReadable; } + std::memcpy(blk->data, pkt->getPtr(), blkSize); + blk->status |= BlkDirty; + // nothing else to do; writeback doesn't expect response + assert(!pkt->needsResponse()); + incHitCount(pkt, id); + return true; + } + + incMissCount(pkt, id); + + if (blk == NULL && pkt->isLLSC() && pkt->isWrite()) { + // complete miss on store conditional... just give up now + pkt->req->setExtraData(0); + return true; } - return satisfied; + return false; } -class ForwardResponseRecord : public Packet::SenderState +class ForwardResponseRecord : public Packet::SenderState, public FastAlloc { Packet::SenderState *prevSenderState; int prevSrc; @@ -382,10 +439,22 @@ Cache::timingAccess(PacketPtr pkt) memSidePort->sendTiming(snoopPkt); // main memory will delete snoopPkt } + // since we're the official target but we aren't responding, + // delete the packet now. + delete pkt; return true; } if (pkt->req->isUncacheable()) { + if (pkt->req->isClrex()) { + tags->clearLocks(); + } else { + BlkType *blk = tags->findBlock(pkt->getAddr()); + if (blk != NULL) { + tags->invalidateBlk(blk); + } + } + // writes go in write buffer, reads use MSHR if (pkt->isWrite() && !pkt->isRead()) { allocateWriteBuffer(pkt, time, true); @@ -397,29 +466,16 @@ Cache::timingAccess(PacketPtr pkt) } int lat = hitLatency; - bool satisfied = false; - - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); - MSHR *mshr = mshrQueue.findMatch(blk_addr); + BlkType *blk = NULL; + PacketList writebacks; - if (!mshr) { - // no outstanding access to this block, look up in cache - // (otherwise if we allow reads while there's an outstanding - // write miss, the read could return stale data out of the - // cache block... a more aggressive system could detect the - // overlap (if any) and forward data out of the MSHRs, but we - // don't do that yet) - BlkType *blk = NULL; - satisfied = access(pkt, blk, lat); - } + bool satisfied = access(pkt, blk, lat, writebacks); #if 0 - PacketList writebacks; + /** @todo make the fast write alloc (wh64) work with coherence. */ // If this is a block size write/hint (WH64) allocate the block here // if the coherence protocol allows it. - /** @todo make the fast write alloc (wh64) work with coherence. */ - /** @todo Do we want to do fast writes for writebacks as well? */ if (!blk && pkt->getSize() >= blkSize && coherence->allowFastWrites() && (pkt->cmd == MemCmd::WriteReq || pkt->cmd == MemCmd::WriteInvalidateReq) ) { @@ -435,15 +491,11 @@ Cache::timingAccess(PacketPtr pkt) ++fastWrites; } } - - // copy writebacks to write buffer - while (!writebacks.empty()) { - PacketPtr wbPkt = writebacks.front(); - allocateWriteBuffer(wbPkt, time, true); - writebacks.pop_front(); - } #endif + // track time of availability of next prefetch, if any + Tick next_pf_time = 0; + bool needsResponse = pkt->needsResponse(); if (satisfied) { @@ -453,16 +505,23 @@ Cache::timingAccess(PacketPtr pkt) } else { delete pkt; } + + if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { + if (blk) + blk->status &= ~BlkHWPrefetched; + next_pf_time = prefetcher->notify(pkt, time); + } } else { // miss - if (prefetchMiss) - prefetcher->handleMiss(pkt, time); + + Addr blk_addr = blockAlign(pkt->getAddr()); + MSHR *mshr = mshrQueue.findMatch(blk_addr); if (mshr) { // MSHR hit //@todo remove hw_pf here - mshr_hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - if (mshr->threadNum != 0/*pkt->req->getThreadNum()*/) { + mshr_hits[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + if (mshr->threadNum != 0/*pkt->req->threadId()*/) { mshr->threadNum = -1; } mshr->allocateTarget(pkt, time, order++); @@ -476,22 +535,57 @@ Cache::timingAccess(PacketPtr pkt) } } else { // no MSHR - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; // always mark as cache fill for now... if we implement // no-write-allocate or bypass accesses this will have to // be changed. if (pkt->cmd == MemCmd::Writeback) { allocateWriteBuffer(pkt, time, true); } else { + if (blk && blk->isValid()) { + // If we have a write miss to a valid block, we + // need to mark the block non-readable. Otherwise + // if we allow reads while there's an outstanding + // write miss, the read could return stale data + // out of the cache block... a more aggressive + // system could detect the overlap (if any) and + // forward data out of the MSHRs, but we don't do + // that yet. Note that we do need to leave the + // block valid so that it stays in the cache, in + // case we get an upgrade response (and hence no + // new data) when the write miss completes. + // As long as CPUs do proper store/load forwarding + // internally, and have a sufficiently weak memory + // model, this is probably unnecessary, but at some + // point it must have seemed like we needed it... + assert(pkt->needsExclusive() && !blk->isWritable()); + blk->status &= ~BlkReadable; + } + allocateMissBuffer(pkt, time, true); } + + if (prefetcher) { + next_pf_time = prefetcher->notify(pkt, time); + } } } + if (next_pf_time != 0) + requestMemSideBus(Request_PF, std::max(time, next_pf_time)); + + // copy writebacks to write buffer + while (!writebacks.empty()) { + PacketPtr wbPkt = writebacks.front(); + allocateWriteBuffer(wbPkt, time, true); + writebacks.pop_front(); + } + return true; } +// See comment in cache.hh. template PacketPtr Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, @@ -500,18 +594,15 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, bool blkValid = blk && blk->isValid(); if (cpu_pkt->req->isUncacheable()) { - assert(blk == NULL); + //assert(blk == NULL); return NULL; } if (!blkValid && - (cpu_pkt->cmd == MemCmd::Writeback || - cpu_pkt->cmd == MemCmd::UpgradeReq)) { - // For now, writebacks from upper-level caches that - // completely miss in the cache just go through. If we had - // "fast write" support (where we could write the whole - // block w/o fetching new data) we might want to allocate - // on writeback misses instead. + (cpu_pkt->cmd == MemCmd::Writeback || cpu_pkt->isUpgrade())) { + // Writebacks that weren't allocated in access() and upgrades + // from upper-level caches that missed completely just go + // through. return NULL; } @@ -527,7 +618,7 @@ Cache::getBusPacket(PacketPtr cpu_pkt, BlkType *blk, // only reason to be here is that blk is shared // (read-only) and we need exclusive assert(needsExclusive && !blk->isWritable()); - cmd = MemCmd::UpgradeReq; + cmd = cpu_pkt->isLLSC() ? MemCmd::SCUpgradeReq : MemCmd::UpgradeReq; } else { // block is invalid cmd = needsExclusive ? MemCmd::ReadExReq : MemCmd::ReadReq; @@ -577,47 +668,79 @@ Cache::atomicAccess(PacketPtr pkt) // access in timing mode BlkType *blk = NULL; + PacketList writebacks; - if (!access(pkt, blk, lat)) { + if (!access(pkt, blk, lat, writebacks)) { // MISS - PacketPtr busPkt = getBusPacket(pkt, blk, pkt->needsExclusive()); + PacketPtr bus_pkt = getBusPacket(pkt, blk, pkt->needsExclusive()); - bool isCacheFill = (busPkt != NULL); + bool is_forward = (bus_pkt == NULL); - if (busPkt == NULL) { + if (is_forward) { // just forwarding the same request to the next level // no local cache operation involved - busPkt = pkt; + bus_pkt = pkt; } DPRINTF(Cache, "Sending an atomic %s for %x\n", - busPkt->cmdString(), busPkt->getAddr()); + bus_pkt->cmdString(), bus_pkt->getAddr()); #if TRACING_ON CacheBlk::State old_state = blk ? blk->status : 0; #endif - lat += memSidePort->sendAtomic(busPkt); + lat += memSidePort->sendAtomic(bus_pkt); DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", - busPkt->cmdString(), busPkt->getAddr(), old_state); - - if (isCacheFill) { - PacketList writebacks; - blk = handleFill(busPkt, blk, writebacks); - satisfyCpuSideRequest(pkt, blk); - delete busPkt; - - // Handle writebacks if needed - while (!writebacks.empty()){ - PacketPtr wbPkt = writebacks.front(); - memSidePort->sendAtomic(wbPkt); - writebacks.pop_front(); - delete wbPkt; + bus_pkt->cmdString(), bus_pkt->getAddr(), old_state); + + assert(!bus_pkt->wasNacked()); + + // If packet was a forward, the response (if any) is already + // in place in the bus_pkt == pkt structure, so we don't need + // to do anything. Otherwise, use the separate bus_pkt to + // generate response to pkt and then delete it. + if (!is_forward) { + if (pkt->needsResponse()) { + assert(bus_pkt->isResponse()); + if (bus_pkt->isError()) { + pkt->makeAtomicResponse(); + pkt->copyError(bus_pkt); + } else if (bus_pkt->isRead() || + bus_pkt->cmd == MemCmd::UpgradeResp) { + // we're updating cache state to allow us to + // satisfy the upstream request from the cache + blk = handleFill(bus_pkt, blk, writebacks); + satisfyCpuSideRequest(pkt, blk); + } else { + // we're satisfying the upstream request without + // modifying cache state, e.g., a write-through + pkt->makeAtomicResponse(); + } } + delete bus_pkt; } } + // Note that we don't invoke the prefetcher at all in atomic mode. + // It's not clear how to do it properly, particularly for + // prefetchers that aggressively generate prefetch candidates and + // rely on bandwidth contention to throttle them; these will tend + // to pollute the cache in atomic mode since there is no bandwidth + // contention. If we ever do want to enable prefetching in atomic + // mode, though, this is the place to do it... see timingAccess() + // for an example (though we'd want to issue the prefetch(es) + // immediately rather than calling requestMemSideBus() as we do + // there). + + // Handle writebacks if needed + while (!writebacks.empty()){ + PacketPtr wbPkt = writebacks.front(); + memSidePort->sendAtomic(wbPkt); + writebacks.pop_front(); + delete wbPkt; + } + // We now have the block one way or another (hit or completed miss) if (pkt->needsResponse()) { @@ -631,21 +754,27 @@ Cache::atomicAccess(PacketPtr pkt) template void Cache::functionalAccess(PacketPtr pkt, + CachePort *incomingPort, CachePort *otherSidePort) { - Addr blk_addr = pkt->getAddr() & ~(blkSize - 1); + Addr blk_addr = blockAlign(pkt->getAddr()); BlkType *blk = tags->findBlock(pkt->getAddr()); - if (blk && pkt->checkFunctional(blk_addr, blkSize, blk->data)) { - // request satisfied from block - return; - } + pkt->pushLabel(name()); - // Need to check for outstanding misses and writes; if neither one - // satisfies, then forward to other side of cache. - if (!(mshrQueue.checkFunctional(pkt, blk_addr) || - writeBuffer.checkFunctional(pkt, blk_addr))) { - otherSidePort->checkAndSendFunctional(pkt); + CacheBlkPrintWrapper cbpw(blk); + bool done = + (blk && pkt->checkFunctional(&cbpw, blk_addr, blkSize, blk->data)) + || incomingPort->checkFunctional(pkt) + || mshrQueue.checkFunctional(pkt, blk_addr) + || writeBuffer.checkFunctional(pkt, blk_addr) + || otherSidePort->checkFunctional(pkt); + + // We're leaving the cache, so pop cache->name() label + pkt->popLabel(); + + if (!done) { + otherSidePort->sendFunctional(pkt); } } @@ -663,6 +792,8 @@ Cache::handleResponse(PacketPtr pkt) { Tick time = curTick + hitLatency; MSHR *mshr = dynamic_cast(pkt->senderState); + bool is_error = pkt->isError(); + assert(mshr); if (pkt->wasNacked()) { @@ -671,7 +802,11 @@ Cache::handleResponse(PacketPtr pkt) "not implemented\n"); return; } - assert(!pkt->isError()); + if (is_error) { + DPRINTF(Cache, "Cache received packet with error for address %x, " + "cmd: %s\n", pkt->getAddr(), pkt->cmdString()); + } + DPRINTF(Cache, "Handling response to %x\n", pkt->getAddr()); MSHRQueue *mq = mshr->queue; @@ -691,14 +826,17 @@ Cache::handleResponse(PacketPtr pkt) PacketList writebacks; if (pkt->req->isUncacheable()) { - mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + mshr_uncacheable_lat[stats_cmd_idx][0/*pkt->req->threadId()*/] += miss_latency; } else { - mshr_miss_latency[stats_cmd_idx][0/*pkt->req->getThreadNum()*/] += + mshr_miss_latency[stats_cmd_idx][0/*pkt->req->threadId()*/] += miss_latency; } - if (mshr->isCacheFill) { + bool is_fill = !mshr->isForward && + (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp); + + if (is_fill && !is_error) { DPRINTF(Cache, "Block for addr %x being updated in Cache\n", pkt->getAddr()); @@ -719,10 +857,12 @@ Cache::handleResponse(PacketPtr pkt) while (mshr->hasTargets()) { MSHR::Target *target = mshr->getTarget(); - if (target->isCpuSide()) { + switch (target->source) { + case MSHR::Target::FromCPU: Tick completion_time; - if (blk != NULL) { - satisfyCpuSideRequest(target->pkt, blk); + if (is_fill) { + satisfyCpuSideRequest(target->pkt, blk, + true, mshr->hasPostDowngrade()); // How many bytes past the first request is this one int transfer_offset = target->pkt->getOffset(blkSize) - initial_offset; @@ -732,30 +872,76 @@ Cache::handleResponse(PacketPtr pkt) // If critical word (no offset) return first word time completion_time = tags->getHitLatency() + - transfer_offset ? pkt->finishTime : pkt->firstWordTime; + (transfer_offset ? pkt->finishTime : pkt->firstWordTime); assert(!target->pkt->req->isUncacheable()); - missLatency[target->pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/] += + missLatency[target->pkt->cmdToIndex()][0/*pkt->req->threadId()*/] += completion_time - target->recvTime; + } else if (target->pkt->cmd == MemCmd::StoreCondReq && + pkt->cmd == MemCmd::UpgradeFailResp) { + // failed StoreCond upgrade + completion_time = tags->getHitLatency() + pkt->finishTime; + target->pkt->req->setExtraData(0); } else { // not a cache fill, just forwarding response completion_time = tags->getHitLatency() + pkt->finishTime; - if (pkt->isRead()) { + if (pkt->isRead() && !is_error) { target->pkt->setData(pkt->getPtr()); } } target->pkt->makeTimingResponse(); + // if this packet is an error copy that to the new packet + if (is_error) + target->pkt->copyError(pkt); + if (target->pkt->cmd == MemCmd::ReadResp && + (pkt->isInvalidate() || mshr->hasPostInvalidate())) { + // If intermediate cache got ReadRespWithInvalidate, + // propagate that. Response should not have + // isInvalidate() set otherwise. + target->pkt->cmd = MemCmd::ReadRespWithInvalidate; + } cpuSidePort->respond(target->pkt, completion_time); - } else { + break; + + case MSHR::Target::FromPrefetcher: + assert(target->pkt->cmd == MemCmd::HardPFReq); + if (blk) + blk->status |= BlkHWPrefetched; + delete target->pkt->req; + delete target->pkt; + break; + + case MSHR::Target::FromSnoop: + // I don't believe that a snoop can be in an error state + assert(!is_error); // response to snoop request DPRINTF(Cache, "processing deferred snoop...\n"); - handleSnoop(target->pkt, blk, true, true); + assert(!(pkt->isInvalidate() && !mshr->hasPostInvalidate())); + handleSnoop(target->pkt, blk, true, true, + mshr->hasPostInvalidate()); + break; + + default: + panic("Illegal target->source enum %d\n", target->source); } mshr->popTarget(); } + if (blk) { + if (pkt->isInvalidate() || mshr->hasPostInvalidate()) { + tags->invalidateBlk(blk); + } else if (mshr->hasPostDowngrade()) { + blk->status &= ~BlkWritable; + } + } + if (mshr->promoteDeferredTargets()) { + // avoid later read getting stale data while write miss is + // outstanding.. see comment in timingAccess() + if (blk) { + blk->status &= ~BlkReadable; + } MSHRQueue *mq = mshr->queue; mq->markPending(mshr); requestMemSideBus((RequestCause)mq->index, pkt->finishTime); @@ -792,7 +978,7 @@ Cache::writebackBlk(BlkType *blk) { assert(blk && blk->isValid() && blk->isDirty()); - writebacks[0/*pkt->req->getThreadNum()*/]++; + writebacks[0/*pkt->req->threadId()*/]++; Request *writebackReq = new Request(tags->regenerateBlkAddr(blk->tag, blk->set), blkSize, 0); @@ -805,6 +991,39 @@ Cache::writebackBlk(BlkType *blk) } +template +typename Cache::BlkType* +Cache::allocateBlock(Addr addr, PacketList &writebacks) +{ + BlkType *blk = tags->findVictim(addr, writebacks); + + if (blk->isValid()) { + Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set); + MSHR *repl_mshr = mshrQueue.findMatch(repl_addr); + if (repl_mshr) { + // must be an outstanding upgrade request on block + // we're about to replace... + assert(!blk->isWritable()); + assert(repl_mshr->needsExclusive()); + // too hard to replace block with transient state + // allocation failed, block not inserted + return NULL; + } else { + DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", + repl_addr, addr, + blk->isDirty() ? "writeback" : "clean"); + + if (blk->isDirty()) { + // Save writeback packet for handling by caller + writebacks.push_back(writebackBlk(blk)); + } + } + } + + return blk; +} + + // Note that the reason we return a list of writebacks rather than // inserting them directly in the write buffer is that this function // is called by both atomic and timing-mode accesses, and in atomic @@ -822,49 +1041,46 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, if (blk == NULL) { // better have read new data... - assert(pkt->isRead()); - + assert(pkt->hasData()); // need to do a replacement - blk = tags->findReplacement(addr, writebacks); - if (blk->isValid()) { - Addr repl_addr = tags->regenerateBlkAddr(blk->tag, blk->set); - MSHR *repl_mshr = mshrQueue.findMatch(repl_addr); - if (repl_mshr) { - // must be an outstanding upgrade request on block - // we're about to replace... - assert(!blk->isWritable()); - assert(repl_mshr->needsExclusive()); - // too hard to replace block with transient state; - // just use temporary storage to complete the current - // request and then get rid of it - assert(!tempBlock->isValid()); - blk = tempBlock; - tempBlock->set = tags->extractSet(addr); - DPRINTF(Cache, "using temp block for %x\n", addr); - } else { - DPRINTF(Cache, "replacement: replacing %x with %x: %s\n", - repl_addr, addr, - blk->isDirty() ? "writeback" : "clean"); - - if (blk->isDirty()) { - // Save writeback packet for handling by caller - writebacks.push_back(writebackBlk(blk)); - } - } + blk = allocateBlock(addr, writebacks); + if (blk == NULL) { + // No replaceable block... just use temporary storage to + // complete the current request and then get rid of it + assert(!tempBlock->isValid()); + blk = tempBlock; + tempBlock->set = tags->extractSet(addr); + tempBlock->tag = tags->extractTag(addr); + DPRINTF(Cache, "using temp block for %x\n", addr); + } else { + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; + tags->insertBlock(pkt->getAddr(), blk, id); } - blk->tag = tags->extractTag(addr); + // starting from scratch with a new block + blk->status = 0; } else { // existing block... probably an upgrade assert(blk->tag == tags->extractTag(addr)); // either we're getting new data or the block should already be valid - assert(pkt->isRead() || blk->isValid()); + assert(pkt->hasData() || blk->isValid()); + // don't clear block status... if block is already dirty we + // don't want to lose that } - if (pkt->needsExclusive() || !pkt->sharedAsserted()) { - blk->status = BlkValid | BlkWritable; - } else { - blk->status = BlkValid; + blk->status |= BlkValid | BlkReadable; + + if (!pkt->sharedAsserted()) { + blk->status |= BlkWritable; + // If we got this via cache-to-cache transfer (i.e., from a + // cache that was an owner) and took away that owner's copy, + // then we need to write it back. Normally this happens + // anyway as a side effect of getting a copy to write it, but + // there are cases (such as failed store conditionals or + // compare-and-swaps) where we'll demand an exclusive copy but + // end up not writing it. + if (pkt->memInhibitAsserted()) + blk->status |= BlkDirty; } DPRINTF(Cache, "Block addr %x moving from state %i to %i\n", @@ -889,64 +1105,81 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, template void -Cache::doTimingSupplyResponse(PacketPtr req_pkt, - uint8_t *blk_data, - bool already_copied) +Cache:: +doTimingSupplyResponse(PacketPtr req_pkt, uint8_t *blk_data, + bool already_copied, bool pending_inval) { // timing-mode snoop responses require a new packet, unless we // already made a copy... - PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt, true); - if (!req_pkt->isInvalidate()) { - // note that we're ignoring the shared flag on req_pkt... it's - // basically irrelveant, as we'll always assert shared unless - // it's an exclusive request, in which case the shared line - // should never be asserted1 - pkt->assertShared(); - } + PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt); + assert(req_pkt->isInvalidate() || pkt->sharedAsserted()); pkt->allocate(); pkt->makeTimingResponse(); if (pkt->isRead()) { pkt->setDataFromBlock(blk_data, blkSize); } + if (pkt->cmd == MemCmd::ReadResp && pending_inval) { + // Assume we defer a response to a read from a far-away cache + // A, then later defer a ReadExcl from a cache B on the same + // bus as us. We'll assert MemInhibit in both cases, but in + // the latter case MemInhibit will keep the invalidation from + // reaching cache A. This special response tells cache A that + // it gets the block to satisfy its read, but must immediately + // invalidate it. + pkt->cmd = MemCmd::ReadRespWithInvalidate; + } memSidePort->respond(pkt, curTick + hitLatency); } template void Cache::handleSnoop(PacketPtr pkt, BlkType *blk, - bool is_timing, bool is_deferred) + bool is_timing, bool is_deferred, + bool pending_inval) { + // deferred snoops can only happen in timing mode + assert(!(is_deferred && !is_timing)); + // pending_inval only makes sense on deferred snoops + assert(!(pending_inval && !is_deferred)); assert(pkt->isRequest()); - // first propagate snoop upward to see if anyone above us wants to - // handle it. save & restore packet src since it will get - // rewritten to be relative to cpu-side bus (if any) - bool alreadyResponded = pkt->memInhibitAsserted(); - if (is_timing) { - Packet *snoopPkt = new Packet(pkt, true); // clear flags - snoopPkt->setExpressSnoop(); - snoopPkt->senderState = new ForwardResponseRecord(pkt, this); - cpuSidePort->sendTiming(snoopPkt); - if (snoopPkt->memInhibitAsserted()) { - // cache-to-cache response from some upper cache - assert(!alreadyResponded); - pkt->assertMemInhibit(); + // the packet may get modified if we or a forwarded snooper + // responds in atomic mode, so remember a few things about the + // original packet up front + bool invalidate = pkt->isInvalidate(); + bool M5_VAR_USED needs_exclusive = pkt->needsExclusive(); + + if (forwardSnoops) { + // first propagate snoop upward to see if anyone above us wants to + // handle it. save & restore packet src since it will get + // rewritten to be relative to cpu-side bus (if any) + bool alreadyResponded = pkt->memInhibitAsserted(); + if (is_timing) { + Packet *snoopPkt = new Packet(pkt, true); // clear flags + snoopPkt->setExpressSnoop(); + snoopPkt->senderState = new ForwardResponseRecord(pkt, this); + cpuSidePort->sendTiming(snoopPkt); + if (snoopPkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache + assert(!alreadyResponded); + pkt->assertMemInhibit(); + } else { + delete snoopPkt->senderState; + } + if (snoopPkt->sharedAsserted()) { + pkt->assertShared(); + } + delete snoopPkt; } else { - delete snoopPkt->senderState; - } - if (snoopPkt->sharedAsserted()) { - pkt->assertShared(); - } - delete snoopPkt; - } else { - int origSrc = pkt->getSrc(); - cpuSidePort->sendAtomic(pkt); - if (!alreadyResponded && pkt->memInhibitAsserted()) { - // cache-to-cache response from some upper cache: - // forward response to original requester - assert(pkt->isResponse()); + int origSrc = pkt->getSrc(); + cpuSidePort->sendAtomic(pkt); + if (!alreadyResponded && pkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache: + // forward response to original requester + assert(pkt->isResponse()); + } + pkt->setSrc(origSrc); } - pkt->setSrc(origSrc); } if (!blk || !blk->isValid()) { @@ -958,10 +1191,9 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // and then do it later bool respond = blk->isDirty() && pkt->needsResponse(); bool have_exclusive = blk->isWritable(); - bool invalidate = pkt->isInvalidate(); - if (pkt->isRead() && !pkt->isInvalidate()) { - assert(!pkt->needsExclusive()); + if (pkt->isRead() && !invalidate) { + assert(!needs_exclusive); pkt->assertShared(); int bits_to_clear = BlkWritable; const bool haveOwnershipState = true; // for now @@ -974,6 +1206,10 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, blk->status &= ~bits_to_clear; } + DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n", + pkt->cmdString(), blockAlign(pkt->getAddr()), + respond ? "responding, " : "", invalidate ? 0 : blk->status); + if (respond) { assert(!pkt->memInhibitAsserted()); pkt->assertMemInhibit(); @@ -981,11 +1217,16 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, pkt->setSupplyExclusive(); } if (is_timing) { - doTimingSupplyResponse(pkt, blk->data, is_deferred); + doTimingSupplyResponse(pkt, blk->data, is_deferred, pending_inval); } else { pkt->makeAtomicResponse(); pkt->setDataFromBlock(blk->data, blkSize); } + } else if (is_timing && is_deferred) { + // if it's a deferred timing snoop then we've made a copy of + // the packet, and so if we're not using that copy to respond + // then we need to delete it here. + delete pkt; } // Do this last in case it deallocates block data or something @@ -993,10 +1234,6 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, if (invalidate) { tags->invalidateBlk(blk); } - - DPRINTF(Cache, "snooped a %s request for addr %x, %snew state is %i\n", - pkt->cmdString(), blockAlign(pkt->getAddr()), - respond ? "responding, " : "", blk->status); } @@ -1015,7 +1252,7 @@ Cache::snoopTiming(PacketPtr pkt) BlkType *blk = tags->findBlock(pkt->getAddr()); - Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); + Addr blk_addr = blockAlign(pkt->getAddr()); MSHR *mshr = mshrQueue.findMatch(blk_addr); // Let the MSHR itself track the snoop and decide whether we want @@ -1035,7 +1272,7 @@ Cache::snoopTiming(PacketPtr pkt) pkt->getAddr()); //Look through writebacks for any non-uncachable writes, use that - for (int i=0; iisUncacheable()); assert(mshr->getNumTargets() == 1); @@ -1052,11 +1289,13 @@ Cache::snoopTiming(PacketPtr pkt) // the packet's invalidate flag is set... assert(pkt->isInvalidate()); } - doTimingSupplyResponse(pkt, wb_pkt->getPtr(), false); + doTimingSupplyResponse(pkt, wb_pkt->getPtr(), + false, false); if (pkt->isInvalidate()) { // Invalidation trumps our writeback... discard here markInService(mshr); + delete wb_pkt; } // If this was a shared writeback, there may still be @@ -1068,7 +1307,7 @@ Cache::snoopTiming(PacketPtr pkt) } } - handleSnoop(pkt, blk, true, false); + handleSnoop(pkt, blk, true, false, false); } @@ -1083,7 +1322,7 @@ Cache::snoopAtomic(PacketPtr pkt) } BlkType *blk = tags->findBlock(pkt->getAddr()); - handleSnoop(pkt, blk, false, false); + handleSnoop(pkt, blk, false, false, false); return hitLatency; } @@ -1148,15 +1387,18 @@ Cache::getNextMSHR() // fall through... no pending requests. Try a prefetch. assert(!miss_mshr && !write_mshr); - if (!mshrQueue.isFull()) { + if (prefetcher && !mshrQueue.isFull()) { // If we have a miss queue slot, we can try a prefetch PacketPtr pkt = prefetcher->getPacket(); if (pkt) { - // Update statistic on number of prefetches issued - // (hwpf_mshr_misses) - mshr_misses[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; - // Don't request bus, since we already have it - return allocateMissBuffer(pkt, curTick, false); + Addr pf_addr = blockAlign(pkt->getAddr()); + if (!tags->findBlock(pf_addr) && !mshrQueue.findMatch(pf_addr)) { + // Update statistic on number of prefetches issued + // (hwpf_mshr_misses) + mshr_misses[pkt->cmdToIndex()][0/*pkt->req->threadId()*/]++; + // Don't request bus, since we already have it + return allocateMissBuffer(pkt, curTick, false); + } } } @@ -1178,7 +1420,17 @@ Cache::getTimingPacket() PacketPtr tgt_pkt = mshr->getTarget()->pkt; PacketPtr pkt = NULL; - if (mshr->isSimpleForward()) { + if (tgt_pkt->cmd == MemCmd::SCUpgradeFailReq) { + // SCUpgradeReq saw invalidation while queued in MSHR, so now + // that we are getting around to processing it, just treat it + // as if we got a failure response + pkt = new Packet(tgt_pkt); + pkt->cmd = MemCmd::UpgradeFailResp; + pkt->senderState = mshr; + pkt->firstWordTime = pkt->finishTime = curTick; + handleResponse(pkt); + return NULL; + } else if (mshr->isForwardNoResponse()) { // no response expected, just forward packet as it is assert(tags->findBlock(mshr->addr) == NULL); pkt = tgt_pkt; @@ -1186,11 +1438,10 @@ Cache::getTimingPacket() BlkType *blk = tags->findBlock(mshr->addr); pkt = getBusPacket(tgt_pkt, blk, mshr->needsExclusive()); - mshr->isCacheFill = (pkt != NULL); + mshr->isForward = (pkt == NULL); - if (pkt == NULL) { + if (mshr->isForward) { // not a cache block request, but a response is expected - assert(!mshr->isSimpleForward()); // make copy of current packet to forward, keep current // copy for response handling pkt = new Packet(tgt_pkt); @@ -1207,6 +1458,22 @@ Cache::getTimingPacket() } +template +Tick +Cache::nextMSHRReadyTime() +{ + Tick nextReady = std::min(mshrQueue.nextMSHRReadyTime(), + writeBuffer.nextMSHRReadyTime()); + + if (prefetcher) { + nextReady = std::min(nextReady, + prefetcher->nextPrefetchReadyTime()); + } + + return nextReady; +} + + /////////////// // // CpuSidePort @@ -1218,10 +1485,10 @@ void Cache::CpuSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { - // CPU side port doesn't snoop; it's a target only. - bool dummy; - otherPort->getPeerAddressRanges(resp, dummy); + // CPU side port doesn't snoop; it's a target only. It can + // potentially respond to any address. snoop = false; + resp.push_back(myCache()->getAddrRange()); } @@ -1253,17 +1520,15 @@ template void Cache::CpuSidePort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - if (!pkt->isResponse()) - myCache()->functionalAccess(pkt, cache->memSidePort); + myCache()->functionalAccess(pkt, this, otherPort); } template Cache:: -CpuSidePort::CpuSidePort(const std::string &_name, - Cache *_cache) - : BaseCache::CachePort(_name, _cache) +CpuSidePort::CpuSidePort(const std::string &_name, Cache *_cache, + const std::string &_label) + : BaseCache::CachePort(_name, _cache, _label) { } @@ -1278,9 +1543,9 @@ void Cache::MemSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { - otherPort->getPeerAddressRanges(resp, snoop); - // Memory-side port always snoops, so unconditionally set flag for - // caller. + // Memory-side port always snoops, but never passes requests + // through to targets on the cpu side (so we don't add anything to + // the address range list). snoop = true; } @@ -1327,9 +1592,7 @@ template void Cache::MemSidePort::recvFunctional(PacketPtr pkt) { - checkFunctional(pkt); - if (!pkt->isResponse()) - myCache()->functionalAccess(pkt, cache->cpuSidePort); + myCache()->functionalAccess(pkt, this, otherPort); } @@ -1360,18 +1623,15 @@ Cache::MemSidePort::sendPacket() MSHR *mshr = dynamic_cast(pkt->senderState); bool success = sendTiming(pkt); - DPRINTF(CachePort, - "Address %x was %s in sending the timing request\n", - pkt->getAddr(), success ? "successful" : "unsuccessful"); waitingOnRetry = !success; if (waitingOnRetry) { DPRINTF(CachePort, "now waiting on a retry\n"); - if (!mshr->isSimpleForward()) { + if (!mshr->isForwardNoResponse()) { delete pkt; } } else { - myCache()->markInService(mshr); + myCache()->markInService(mshr, pkt); } } } @@ -1385,10 +1645,10 @@ Cache::MemSidePort::sendPacket() // @TODO: need to facotr in prefetch requests here somehow if (nextReady != MaxTick) { DPRINTF(CachePort, "more packets to send @ %d\n", nextReady); - sendEvent->schedule(std::max(nextReady, curTick + 1)); + schedule(sendEvent, std::max(nextReady, curTick + 1)); } else { // no more to send right now: if we're draining, we may be done - if (drainEvent) { + if (drainEvent && !sendEvent->scheduled()) { drainEvent->process(); drainEvent = NULL; } @@ -1416,8 +1676,9 @@ Cache::MemSidePort::processSendEvent() template Cache:: -MemSidePort::MemSidePort(const std::string &_name, Cache *_cache) - : BaseCache::CachePort(_name, _cache) +MemSidePort::MemSidePort(const std::string &_name, Cache *_cache, + const std::string &_label) + : BaseCache::CachePort(_name, _cache, _label) { // override default send event from SimpleTimingPort delete sendEvent;