From e57d8f2d897bc26aade774e090842367e38e974b Mon Sep 17 00:00:00 2001 From: Nikos Nikoleris Date: Mon, 10 Oct 2016 14:40:10 +0100 Subject: [PATCH] mem: Restructure whole-line writes to simplify write merging This patch changes how we deal with whole-line writes their responses. With these changes, we use the MSHR tracking to determine if a whole-line is written, and on a fill we simply handle the invalidation response, with the actual writes taking place as part of satisfying the CPU-side hit. Change-Id: I9a18e41a95db3c20b97f8bca7d95ff33d35a578b Reviewed-on: https://gem5-review.googlesource.com/c/12905 Reviewed-by: Andreas Sandberg Reviewed-by: Daniel Carvalho Maintainer: Nikos Nikoleris --- src/mem/cache/base.cc | 21 +++++++++++---------- src/mem/cache/base.hh | 5 ++++- src/mem/cache/cache.cc | 26 +++++++++++--------------- src/mem/cache/cache.hh | 3 ++- src/mem/cache/noncoherent_cache.cc | 6 ++++-- src/mem/cache/noncoherent_cache.hh | 3 ++- src/mem/packet.hh | 6 ++++++ 7 files changed, 40 insertions(+), 30 deletions(-) diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 0eeb19252..b292e5a25 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -474,7 +474,12 @@ BaseCache::recvTimingResp(PacketPtr pkt) PacketList writebacks; bool is_fill = !mshr->isForward && - (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp); + (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp || + mshr->wasWholeLineWrite); + + // make sure that if the mshr was due to a whole line write then + // the response is an invalidation + assert(!mshr->wasWholeLineWrite || pkt->isInvalidate()); CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); @@ -1121,7 +1126,7 @@ CacheBlk* BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, bool allocate) { - assert(pkt->isResponse() || pkt->cmd == MemCmd::WriteLineReq); + assert(pkt->isResponse()); Addr addr = pkt->getAddr(); bool is_secure = pkt->isSecure(); #if TRACING_ON @@ -1134,12 +1139,7 @@ BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, if (!blk) { // better have read new data... - assert(pkt->hasData()); - - // only read responses and write-line requests have data; - // note that we don't write the data here for write-line - that - // happens in the subsequent call to satisfyRequest - assert(pkt->isRead() || pkt->cmd == MemCmd::WriteLineReq); + assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp); // need to do a replacement if allocating, otherwise we stick // with the temporary storage @@ -1173,7 +1173,7 @@ BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks, // sanity check for whole-line writes, which should always be // marked as writable as part of the fill, and then later marked // dirty as part of satisfyRequest - if (pkt->cmd == MemCmd::WriteLineReq) { + if (pkt->cmd == MemCmd::InvalidateResp) { assert(!pkt->hasSharers()); } @@ -1465,7 +1465,8 @@ BaseCache::sendMSHRQueuePacket(MSHR* mshr) // either a prefetch that is not present upstream, or a normal // MSHR request, proceed to get the packet to send downstream - PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable()); + PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(), + mshr->isWholeLineWrite()); mshr->isForward = (pkt == nullptr); diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 92748a38b..47218f828 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -566,10 +566,13 @@ class BaseCache : public MemObject * @param blk The referenced block, can be nullptr. * @param needs_writable Indicates that the block must be writable * even if the request in cpu_pkt doesn't indicate that. + * @param is_whole_line_write True if there are writes for the + * whole line * @return A packet send to the memory below */ virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, - bool needs_writable) const = 0; + bool needs_writable, + bool is_whole_line_write) const = 0; /** * Determine if clean lines should be written back or not. In diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc index 116b543cc..d38680eb8 100644 --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -477,7 +477,8 @@ Cache::recvTimingReq(PacketPtr pkt) PacketPtr Cache::createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, - bool needsWritable) const + bool needsWritable, + bool is_whole_line_write) const { // should never see evictions here assert(!cpu_pkt->isEviction()); @@ -500,7 +501,8 @@ Cache::createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, // write miss on a shared owned block will generate a ReadExcl, // which will clobber the owned copy. const bool useUpgrades = true; - if (cpu_pkt->cmd == MemCmd::WriteLineReq) { + assert(cpu_pkt->cmd != MemCmd::WriteLineReq || is_whole_line_write); + if (is_whole_line_write) { assert(!blkValid || !blk->isWritable()); // forward as invalidate to all other caches, this gives us // the line in Exclusive state, and invalidates all other @@ -580,7 +582,8 @@ Cache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk, // only misses left - PacketPtr bus_pkt = createMissPacket(pkt, blk, pkt->needsWritable()); + PacketPtr bus_pkt = createMissPacket(pkt, blk, pkt->needsWritable(), + pkt->isWholeLineWrite(blkSize)); bool is_forward = (bus_pkt == nullptr); @@ -615,12 +618,12 @@ Cache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk, if (bus_pkt->isError()) { pkt->makeAtomicResponse(); pkt->copyError(bus_pkt); - } else if (pkt->cmd == MemCmd::WriteLineReq) { + } else if (pkt->isWholeLineWrite(blkSize)) { // note the use of pkt, not bus_pkt here. // write-line request to the cache that promoted // the write to a whole line - blk = handleFill(pkt, blk, writebacks, + blk = handleFill(bus_pkt, blk, writebacks, allocOnFill(pkt->cmd)); assert(blk != NULL); is_invalidate = false; @@ -676,7 +679,8 @@ Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk, const bool is_error = pkt->isError(); // allow invalidation responses originating from write-line // requests to be discarded - bool is_invalidate = pkt->isInvalidate(); + bool is_invalidate = pkt->isInvalidate() && + !mshr->wasWholeLineWrite; MSHR::TargetList targets = mshr->extractServiceableTargets(pkt); for (auto &target: targets) { @@ -706,16 +710,8 @@ Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk, // from above. if (tgt_pkt->cmd == MemCmd::WriteLineReq) { assert(!is_error); - // we got the block in a writable state, so promote - // any deferred targets if possible - mshr->promoteWritable(); - // NB: we use the original packet here and not the response! - blk = handleFill(tgt_pkt, blk, writebacks, - targets.allocOnFill); assert(blk); - - // discard the invalidation response - is_invalidate = false; + assert(blk->isWritable()); } if (blk && blk->isValid() && !mshr->isForward) { diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index f8eccfee6..588e7b94e 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -152,7 +152,8 @@ class Cache : public BaseCache PacketPtr cleanEvictBlk(CacheBlk *blk); PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, - bool needsWritable) const override; + bool needs_writable, + bool is_whole_line_write) const override; /** * Send up a snoop request and find cached copies. If cached copies are diff --git a/src/mem/cache/noncoherent_cache.cc b/src/mem/cache/noncoherent_cache.cc index b4ffed786..726c32f1c 100644 --- a/src/mem/cache/noncoherent_cache.cc +++ b/src/mem/cache/noncoherent_cache.cc @@ -148,7 +148,8 @@ NoncoherentCache::recvTimingReq(PacketPtr pkt) PacketPtr NoncoherentCache::createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, - bool needs_writable) const + bool needs_writable, + bool is_whole_line_write) const { // We also fill for writebacks from the coherent caches above us, // and they do not need responses @@ -173,7 +174,8 @@ Cycles NoncoherentCache::handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk, PacketList &writebacks) { - PacketPtr bus_pkt = createMissPacket(pkt, blk, true); + PacketPtr bus_pkt = createMissPacket(pkt, blk, true, + pkt->isWholeLineWrite(blkSize)); DPRINTF(Cache, "Sending an atomic %s\n", bus_pkt->print()); Cycles latency = ticksToCycles(memSidePort.sendAtomic(bus_pkt)); diff --git a/src/mem/cache/noncoherent_cache.hh b/src/mem/cache/noncoherent_cache.hh index 2a60f4c5e..fea3cd980 100644 --- a/src/mem/cache/noncoherent_cache.hh +++ b/src/mem/cache/noncoherent_cache.hh @@ -120,7 +120,8 @@ class NoncoherentCache : public BaseCache * needs_writeble parameter is ignored. */ PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk, - bool needs_writable) const override; + bool needs_writable, + bool is_whole_line_write) const override; M5_NODISCARD PacketPtr evictBlock(CacheBlk *blk) override; diff --git a/src/mem/packet.hh b/src/mem/packet.hh index e378de8f0..f0b7c2f2f 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -553,6 +553,12 @@ class Packet : public Printable bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } + bool isWholeLineWrite(unsigned blk_size) + { + return (cmd == MemCmd::WriteReq || cmd == MemCmd::WriteLineReq) && + getOffset(blk_size) == 0 && getSize() == blk_size; + } + //@{ /// Snoop flags /** -- 2.30.2