memSidePort->schedTimingSnoopResp(pkt, forward_time, true);
}
-void
+uint32_t
Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
bool is_deferred, bool pending_inval)
{
bool invalidate = pkt->isInvalidate();
bool M5_VAR_USED needs_exclusive = pkt->needsExclusive();
+ uint32_t snoop_delay = 0;
+
if (forwardSnoops) {
// first propagate snoop upward to see if anyone above us wants to
// handle it. save & restore packet src since it will get
// time
snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
cpuSidePort->sendTimingSnoopReq(&snoopPkt);
+
+ // add the header delay (including crossbar and snoop
+ // delays) of the upward snoop to the snoop delay for this
+ // cache
+ snoop_delay += snoopPkt.headerDelay;
+
if (snoopPkt.memInhibitAsserted()) {
// cache-to-cache response from some upper cache
assert(!alreadyResponded);
if (!blk || !blk->isValid()) {
DPRINTF(Cache, "%s snoop miss for %s addr %#llx size %d\n",
__func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
- return;
+ return snoop_delay;
} else {
DPRINTF(Cache, "%s snoop hit for %s for addr %#llx size %d, "
"old state is %s\n", __func__, pkt->cmdString(),
DPRINTF(Cache, "Found addr %#llx in upper level cache for snoop %s from"
" lower cache\n", pkt->getAddr(), pkt->cmdString());
pkt->setBlockCached();
- return;
+ return snoop_delay;
}
if (!pkt->req->isUncacheable() && pkt->isRead() && !invalidate) {
}
DPRINTF(Cache, "new state is %s\n", blk->print());
+
+ return snoop_delay;
}
Addr blk_addr = blockAlign(pkt->getAddr());
MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
+ // Update the latency cost of the snoop so that the crossbar can
+ // account for it. Do not overwrite what other neighbouring caches
+ // have already done, rather take the maximum. The update is
+ // tentative, for cases where we return before an upward snoop
+ // happens below.
+ pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay,
+ lookupLatency * clockPeriod());
+
// Inform request(Prefetch, CleanEvict or Writeback) from below of
// MSHR hit, set setBlockCached.
if (mshr && pkt->mustCheckAbove()) {
// We could be more selective and return here if the
// request is non-exclusive or if the writeback is
// exclusive.
- handleSnoop(pkt, blk, true, false, false);
+ uint32_t snoop_delay = handleSnoop(pkt, blk, true, false, false);
+
+ // Override what we did when we first saw the snoop, as we now
+ // also have the cost of the upwards snoops to account for
+ pkt->snoopDelay = std::max<uint32_t>(pkt->snoopDelay, snoop_delay +
+ lookupLatency * clockPeriod());
}
bool
}
CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
- handleSnoop(pkt, blk, false, false, false);
- // We consider forwardLatency here because a snoop occurs in atomic mode
- return forwardLatency * clockPeriod();
+ uint32_t snoop_delay = handleSnoop(pkt, blk, false, false, false);
+ return snoop_delay + lookupLatency * clockPeriod();
}
bool already_copied, bool pending_inval);
/**
- * Sets the blk to the new state.
- * @param blk The cache block being snooped.
- * @param new_state The new coherence state for the block.
+ * Perform an upward snoop if needed, and update the block state
+ * (possibly invalidating the block). Also create a response if required.
+ *
+ * @param pkt Snoop packet
+ * @param blk Cache block being snooped
+ * @param is_timing Timing or atomic for the response
+ * @param is_deferred Is this a deferred snoop or not?
+ * @param pending_inval Do we have a pending invalidation?
+ *
+ * @return The snoop delay incurred by the upwards snoop
*/
- void handleSnoop(PacketPtr ptk, CacheBlk *blk,
- bool is_timing, bool is_deferred, bool pending_inval);
+ uint32_t handleSnoop(PacketPtr pkt, CacheBlk *blk,
+ bool is_timing, bool is_deferred, bool pending_inval);
/**
* Create a writeback request for the given block.
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
if (!system->bypassCaches()) {
+ assert(pkt->snoopDelay == 0);
+
// the packet is a memory-mapped request and should be
// broadcasted to our snoopers but the source
if (snoopFilter) {
} else {
forwardTiming(pkt, slave_port_id);
}
+
+ // add the snoop delay to our header delay, and then reset it
+ pkt->headerDelay += pkt->snoopDelay;
+ pkt->snoopDelay = 0;
}
// forwardTiming snooped into peer caches of the sender, and if
// we should only see express snoops from caches
assert(pkt->isExpressSnoop());
+ // set the packet header and payload delay, for now use forward latency
+ // @todo Assess the choice of latency further
+ calcPacketTiming(pkt, forwardLatency * clockPeriod());
+
// remeber if the packet is inhibited so we can see if it changes
const bool is_inhibited = pkt->memInhibitAsserted();
+ assert(pkt->snoopDelay == 0);
+
if (snoopFilter) {
// let the Snoop Filter work its magic and guide probing
auto sf_res = snoopFilter->lookupSnoop(pkt);
forwardTiming(pkt, InvalidPortID);
}
+ // add the snoop delay to our header delay, and then reset it
+ pkt->headerDelay += pkt->snoopDelay;
+ pkt->snoopDelay = 0;
+
// if we can expect a response, remember how to route it
if (!is_inhibited && pkt->memInhibitAsserted()) {
assert(routeTo.find(pkt->req) == routeTo.end());
*/
uint32_t headerDelay;
+ /**
+ * Keep track of the extra delay incurred by snooping upwards
+ * before sending a request down the memory system. This is used
+ * by the coherent crossbar to account for the additional request
+ * delay.
+ */
+ uint32_t snoopDelay;
+
/**
* The extra pipelining delay from seeing the packet until the end of
* payload is transmitted by the component that provided it (if
*/
Packet(const RequestPtr _req, MemCmd _cmd)
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
- size(0), headerDelay(0), payloadDelay(0),
+ size(0), headerDelay(0), snoopDelay(0), payloadDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
*/
Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
: cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
- headerDelay(0), payloadDelay(0),
+ headerDelay(0), snoopDelay(0), payloadDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
bytesValid(pkt->bytesValid),
headerDelay(pkt->headerDelay),
+ snoopDelay(0),
payloadDelay(pkt->payloadDelay),
senderState(pkt->senderState)
{