X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmem%2Fcoherent_xbar.cc;h=713c6421b011c996ffd132665bb0a96d63062995;hb=44e3c95555b380f62c3fa4d878d78f26ad035475;hp=667ff96f9b4ebc55bf63377de505eb32cdd9d72c;hpb=f26a28929583f2ed7fb55521e49c3f9bef557c05;p=gem5.git diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 667ff96f9..713c6421b 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,6 +40,7 @@ * Authors: Ali Saidi * Andreas Hansson * William Wang + * Nikos Nikoleris */ /** @@ -47,15 +48,25 @@ * Definition of a crossbar object. */ -#include "base/misc.hh" +#include "mem/coherent_xbar.hh" + +#include "base/logging.hh" #include "base/trace.hh" #include "debug/AddrRanges.hh" #include "debug/CoherentXBar.hh" -#include "mem/coherent_xbar.hh" #include "sim/system.hh" CoherentXBar::CoherentXBar(const CoherentXBarParams *p) - : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter) + : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter), + snoopResponseLatency(p->snoop_response_latency), + maxOutstandingSnoopCheck(p->max_outstanding_snoops), + maxRoutingTableSizeCheck(p->max_routing_table_size), + pointOfCoherency(p->point_of_coherency), + pointOfUnification(p->point_of_unification), + + snoops(this, "snoops", "Total snoops (count)"), + snoopTraffic(this, "snoopTraffic", "Total snoop traffic (bytes)"), + snoopFanout(this, "snoop_fanout", "Request fanout histogram") { // create the ports based on the size of the master and slave // vector ports, and the presence of the default port, the ports @@ -65,9 +76,9 @@ CoherentXBar::CoherentXBar(const CoherentXBarParams *p) MasterPort* bp = new CoherentXBarMasterPort(portName, *this, i); masterPorts.push_back(bp); reqLayers.push_back(new ReqLayer(*bp, *this, - csprintf(".reqLayer%d", i))); - snoopLayers.push_back(new SnoopRespLayer(*bp, *this, - csprintf(".snoopLayer%d", i))); + csprintf("reqLayer%d", i))); + snoopLayers.push_back( + new SnoopRespLayer(*bp, *this, csprintf("snoopLayer%d", i))); } // see if we have a default slave device connected and if so add @@ -76,29 +87,24 @@ CoherentXBar::CoherentXBar(const CoherentXBarParams *p) defaultPortID = masterPorts.size(); std::string portName = name() + ".default"; MasterPort* bp = new CoherentXBarMasterPort(portName, *this, - defaultPortID); + defaultPortID); masterPorts.push_back(bp); - reqLayers.push_back(new ReqLayer(*bp, *this, csprintf(".reqLayer%d", - defaultPortID))); + reqLayers.push_back(new ReqLayer(*bp, *this, csprintf("reqLayer%d", + defaultPortID))); snoopLayers.push_back(new SnoopRespLayer(*bp, *this, - csprintf(".snoopLayer%d", + csprintf("snoopLayer%d", defaultPortID))); } // create the slave ports, once again starting at zero for (int i = 0; i < p->port_slave_connection_count; ++i) { std::string portName = csprintf("%s.slave[%d]", name(), i); - SlavePort* bp = new CoherentXBarSlavePort(portName, *this, i); + QueuedSlavePort* bp = new CoherentXBarSlavePort(portName, *this, i); slavePorts.push_back(bp); respLayers.push_back(new RespLayer(*bp, *this, - csprintf(".respLayer%d", i))); + csprintf("respLayer%d", i))); snoopRespPorts.push_back(new SnoopRespPort(*bp, *this)); } - - if (snoopFilter) - snoopFilter->setSlavePorts(slavePorts); - - clearPortCache(); } CoherentXBar::~CoherentXBar() @@ -116,7 +122,6 @@ CoherentXBar::~CoherentXBar() void CoherentXBar::init() { - // the base class is responsible for determining the block size BaseXBar::init(); // iterate over our slave ports and determine which of our @@ -124,14 +129,18 @@ CoherentXBar::init() for (const auto& p: slavePorts) { // check if the connected master port is snooping if (p->isSnooping()) { - DPRINTF(AddrRanges, "Adding snooping master %s\n", - p->getMasterPort().name()); + DPRINTF(AddrRanges, "Adding snooping master %s\n", p->getPeer()); snoopPorts.push_back(p); } } if (snoopPorts.empty()) warn("CoherentXBar %s has no snooping ports attached!\n", name()); + + // inform the snoop filter about the slave ports so it can create + // its own internal representation + if (snoopFilter) + snoopFilter->setSlavePorts(slavePorts); } bool @@ -142,94 +151,173 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) // remember if the packet is an express snoop bool is_express_snoop = pkt->isExpressSnoop(); - bool is_inhibited = pkt->memInhibitAsserted(); + bool cache_responding = pkt->cacheResponding(); // for normal requests, going downstream, the express snoop flag - // and the inhibited flag should always be the same - assert(is_express_snoop == is_inhibited); + // and the cache responding flag should always be the same + assert(is_express_snoop == cache_responding); - // determine the destination based on the address - PortID master_port_id = findPort(pkt->getAddr()); + // determine the destination based on the destination address range + PortID master_port_id = findPort(pkt->getAddrRange()); // test if the crossbar should be considered occupied for the current // port, and exclude express snoops from the check if (!is_express_snoop && !reqLayers[master_port_id]->tryTiming(src_port)) { - DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x BUSY\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__, + src_port->name(), pkt->print()); return false; } - DPRINTF(CoherentXBar, "recvTimingReq: src %s %s expr %d 0x%x\n", - src_port->name(), pkt->cmdString(), is_express_snoop, - pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + src_port->name(), pkt->print()); // store size and command as they might be modified when // forwarding the packet unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // store the old header delay so we can restore it if needed + Tick old_header_delay = pkt->headerDelay; + + // a request sees the frontend and forward latency + Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; + + // is this the destination point for this packet? (e.g. true if + // this xbar is the PoC for a cache maintenance operation to the + // PoC) otherwise the destination is any cache that can satisfy + // the request + const bool is_destination = isDestination(pkt); + + const bool snoop_caches = !system->bypassCaches() && + pkt->cmd != MemCmd::WriteClean; + if (snoop_caches) { + assert(pkt->snoopDelay == 0); + + if (pkt->isClean() && !is_destination) { + // before snooping we need to make sure that the memory + // below is not busy and the cache clean request can be + // forwarded to it + if (!masterPorts[master_port_id]->tryTiming(pkt)) { + DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__, + src_port->name(), pkt->print()); + + // update the layer state and schedule an idle event + reqLayers[master_port_id]->failedTiming(src_port, + clockEdge(Cycles(1))); + return false; + } + } + - // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable() && !system->bypassCaches()) { // the packet is a memory-mapped request and should be // broadcasted to our snoopers but the source if (snoopFilter) { // check with the snoop filter where to forward this packet auto sf_res = snoopFilter->lookupRequest(pkt, *src_port); - packetFinishTime += sf_res.second * clockPeriod(); - DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\ - " SF size: %i lat: %i\n", src_port->name(), - pkt->cmdString(), pkt->getAddr(), sf_res.first.size(), - sf_res.second); - forwardTiming(pkt, slave_port_id, sf_res.first); + // the time required by a packet to be delivered through + // the xbar has to be charged also with to lookup latency + // of the snoop filter + pkt->headerDelay += sf_res.second * clockPeriod(); + DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n", + __func__, src_port->name(), pkt->print(), + sf_res.first.size(), sf_res.second); + + if (pkt->isEviction()) { + // for block-evicting packets, i.e. writebacks and + // clean evictions, there is no need to snoop up, as + // all we do is determine if the block is cached or + // not, instead just set it here based on the snoop + // filter result + if (!sf_res.first.empty()) + pkt->setBlockCached(); + } else { + forwardTiming(pkt, slave_port_id, sf_res.first); + } } else { forwardTiming(pkt, slave_port_id); } + + // add the snoop delay to our header delay, and then reset it + pkt->headerDelay += pkt->snoopDelay; + pkt->snoopDelay = 0; } - // remember if the packet will generate a snoop response - const bool expect_snoop_resp = !is_inhibited && pkt->memInhibitAsserted(); - const bool expect_response = pkt->needsResponse() && - !pkt->memInhibitAsserted(); + // set up a sensible starting point + bool success = true; + + // remember if the packet will generate a snoop response by + // checking if a cache set the cacheResponding flag during the + // snooping above + const bool expect_snoop_resp = !cache_responding && pkt->cacheResponding(); + bool expect_response = pkt->needsResponse() && !pkt->cacheResponding(); + + const bool sink_packet = sinkPacket(pkt); + + // in certain cases the crossbar is responsible for responding + bool respond_directly = false; + // store the original address as an address mapper could possibly + // modify the address upon a sendTimingRequest + const Addr addr(pkt->getAddr()); + if (sink_packet) { + DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__, + pkt->print()); + } else { + // determine if we are forwarding the packet, or responding to + // it + if (forwardPacket(pkt)) { + // if we are passing on, rather than sinking, a packet to + // which an upstream cache has committed to responding, + // the line was needs writable, and the responding only + // had an Owned copy, so we need to immidiately let the + // downstream caches know, bypass any flow control + if (pkt->cacheResponding()) { + pkt->setExpressSnoop(); + } - // Note: Cannot create a copy of the full packet, here. - MemCmd orig_cmd(pkt->cmd); + // make sure that the write request (e.g., WriteClean) + // will stop at the memory below if this crossbar is its + // destination + if (pkt->isWrite() && is_destination) { + pkt->clearWriteThrough(); + } - // since it is a normal request, attempt to send the packet - bool success = masterPorts[master_port_id]->sendTimingReq(pkt); + // since it is a normal request, attempt to send the packet + success = masterPorts[master_port_id]->sendTimingReq(pkt); + } else { + // no need to forward, turn this packet around and respond + // directly + assert(pkt->needsResponse()); - if (snoopFilter && !pkt->req->isUncacheable() - && !system->bypassCaches()) { - // The packet may already be overwritten by the sendTimingReq function. - // The snoop filter needs to see the original request *and* the return - // status of the send operation, so we need to recreate the original - // request. Atomic mode does not have the issue, as there the send - // operation and the response happen instantaneously and don't need two - // phase tracking. - MemCmd tmp_cmd(pkt->cmd); - pkt->cmd = orig_cmd; + respond_directly = true; + assert(!expect_snoop_resp); + expect_response = false; + } + } + + if (snoopFilter && snoop_caches) { // Let the snoop filter know about the success of the send operation - snoopFilter->updateRequest(pkt, *src_port, !success); - pkt->cmd = tmp_cmd; + snoopFilter->finishRequest(!success, addr, pkt->isSecure()); } // check if we were successful in sending the packet onwards if (!success) { - // express snoops and inhibited packets should never be forced - // to retry + // express snoops should never be forced to retry assert(!is_express_snoop); - assert(!pkt->memInhibitAsserted()); - // undo the calculation so we can check for 0 again - pkt->headerDelay = pkt->payloadDelay = 0; + // restore the header delay + pkt->headerDelay = old_header_delay; - DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__, + src_port->name(), pkt->print()); // update the layer state and schedule an idle event reqLayers[master_port_id]->failedTiming(src_port, - clockEdge(headerCycles)); + clockEdge(Cycles(1))); } else { // express snoops currently bypass the crossbar state entirely if (!is_express_snoop) { @@ -242,8 +330,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) outstandingSnoop.insert(pkt->req); // basic sanity check on the outstanding snoops - panic_if(outstandingSnoop.size() > 512, - "Outstanding snoop requests exceeded 512\n"); + panic_if(outstandingSnoop.size() > maxOutstandingSnoopCheck, + "%s: Outstanding snoop requests exceeded %d\n", + name(), maxOutstandingSnoopCheck); } // remember where to route the normal response to @@ -251,8 +340,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) assert(routeTo.find(pkt->req) == routeTo.end()); routeTo[pkt->req] = slave_port_id; - panic_if(routeTo.size() > 512, - "Routing table exceeds 512 packets\n"); + panic_if(routeTo.size() > maxRoutingTableSizeCheck, + "%s: Routing table exceeds %d packets\n", + name(), maxRoutingTableSizeCheck); } // update the layer state and schedule an idle event @@ -264,8 +354,87 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) pktSize[slave_port_id][master_port_id] += pkt_size; transDist[pkt_cmd]++; - if (is_express_snoop) + if (is_express_snoop) { snoops++; + snoopTraffic += pkt_size; + } + } + + if (sink_packet) + // queue the packet for deletion + pendingDelete.reset(pkt); + + // normally we respond to the packet we just received if we need to + PacketPtr rsp_pkt = pkt; + PortID rsp_port_id = slave_port_id; + + // If this is the destination of the cache clean operation the + // crossbar is responsible for responding. This crossbar will + // respond when the cache clean is complete. A cache clean + // is complete either: + // * direcly, if no cache above had a dirty copy of the block + // as indicated by the satisfied flag of the packet, or + // * when the crossbar has seen both the cache clean request + // (CleanSharedReq, CleanInvalidReq) and the corresponding + // write (WriteClean) which updates the block in the memory + // below. + if (success && + ((pkt->isClean() && pkt->satisfied()) || + pkt->cmd == MemCmd::WriteClean) && + is_destination) { + PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt; + auto cmo_lookup = outstandingCMO.find(pkt->id); + if (cmo_lookup != outstandingCMO.end()) { + // the cache clean request has already reached this xbar + respond_directly = true; + if (pkt->isWrite()) { + rsp_pkt = cmo_lookup->second; + assert(rsp_pkt); + + // determine the destination + const auto route_lookup = routeTo.find(rsp_pkt->req); + assert(route_lookup != routeTo.end()); + rsp_port_id = route_lookup->second; + assert(rsp_port_id != InvalidPortID); + assert(rsp_port_id < respLayers.size()); + // remove the request from the routing table + routeTo.erase(route_lookup); + } + outstandingCMO.erase(cmo_lookup); + } else { + respond_directly = false; + outstandingCMO.emplace(pkt->id, deferred_rsp); + if (!pkt->isWrite()) { + assert(routeTo.find(pkt->req) == routeTo.end()); + routeTo[pkt->req] = slave_port_id; + + panic_if(routeTo.size() > maxRoutingTableSizeCheck, + "%s: Routing table exceeds %d packets\n", + name(), maxRoutingTableSizeCheck); + } + } + } + + + if (respond_directly) { + assert(rsp_pkt->needsResponse()); + assert(success); + + rsp_pkt->makeResponse(); + + if (snoopFilter && !system->bypassCaches()) { + // let the snoop filter inspect the response and update its state + snoopFilter->updateResponse(rsp_pkt, *slavePorts[rsp_port_id]); + } + + // we send the response after the current packet, even if the + // response is not for this packet (e.g. cache clean operation + // where both the request and the write packet have to cross + // the destination xbar before the response is sent.) + Tick response_time = clockEdge() + pkt->headerDelay; + rsp_pkt->headerDelay = 0; + + slavePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time); } return success; @@ -287,33 +456,38 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) // test if the crossbar should be considered occupied for the // current port if (!respLayers[slave_port_id]->tryTiming(src_port)) { - DPRINTF(CoherentXBar, "recvTimingResp: src %s %s 0x%x BUSY\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__, + src_port->name(), pkt->print()); return false; } - DPRINTF(CoherentXBar, "recvTimingResp: src %s %s 0x%x\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + src_port->name(), pkt->print()); // store size and command as they might be modified when // forwarding the packet unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // a response sees the response latency + Tick xbar_delay = responseLatency * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); - if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) { + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; + + if (snoopFilter && !system->bypassCaches()) { // let the snoop filter inspect the response and update its state snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]); } - // send the packet through the destination slave port - bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt); - - // currently it is illegal to block responses... can lead to - // deadlock - assert(success); + // send the packet through the destination slave port and pay for + // any outstanding header delay + Tick latency = pkt->headerDelay; + pkt->headerDelay = 0; + slavePorts[slave_port_id]->schedTimingResp(pkt, curTick() + latency); // remove the request from the routing table routeTo.erase(route_lookup); @@ -331,28 +505,38 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) void CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) { - DPRINTF(CoherentXBar, "recvTimingSnoopReq: src %s %s 0x%x\n", - masterPorts[master_port_id]->name(), pkt->cmdString(), - pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + masterPorts[master_port_id]->name(), pkt->print()); // update stats here as we know the forwarding will succeed + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; transDist[pkt->cmdToIndex()]++; snoops++; + snoopTraffic += pkt_size; // we should only see express snoops from caches assert(pkt->isExpressSnoop()); - // remeber if the packet is inhibited so we can see if it changes - const bool is_inhibited = pkt->memInhibitAsserted(); + // set the packet header and payload delay, for now use forward latency + // @todo Assess the choice of latency further + calcPacketTiming(pkt, forwardLatency * clockPeriod()); + + // remember if a cache has already committed to responding so we + // can see if it changes during the snooping + const bool cache_responding = pkt->cacheResponding(); + + assert(pkt->snoopDelay == 0); if (snoopFilter) { // let the Snoop Filter work its magic and guide probing auto sf_res = snoopFilter->lookupSnoop(pkt); - // No timing here: packetFinishTime += sf_res.second * clockPeriod(); - DPRINTF(CoherentXBar, "recvTimingSnoopReq: src %s %s 0x%x"\ - " SF size: %i lat: %i\n", masterPorts[master_port_id]->name(), - pkt->cmdString(), pkt->getAddr(), sf_res.first.size(), - sf_res.second); + // the time required by a packet to be delivered through + // the xbar has to be charged also with to lookup latency + // of the snoop filter + pkt->headerDelay += sf_res.second * clockPeriod(); + DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n", + __func__, masterPorts[master_port_id]->name(), pkt->print(), + sf_res.first.size(), sf_res.second); // forward to all snoopers forwardTiming(pkt, InvalidPortID, sf_res.first); @@ -360,8 +544,12 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) forwardTiming(pkt, InvalidPortID); } + // add the snoop delay to our header delay, and then reset it + pkt->headerDelay += pkt->snoopDelay; + pkt->snoopDelay = 0; + // if we can expect a response, remember how to route it - if (!is_inhibited && pkt->memInhibitAsserted()) { + if (!cache_responding && pkt->cacheResponding()) { assert(routeTo.find(pkt->req) == routeTo.end()); routeTo[pkt->req] = master_port_id; } @@ -371,7 +559,7 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id) // device responsible for the address range something is // wrong, hence there is nothing further to do as the packet // would be going back to where it came from - assert(master_port_id == findPort(pkt->getAddr())); + assert(findPort(pkt->getAddrRange()) == master_port_id); } bool @@ -400,8 +588,8 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) if (forwardAsSnoop) { assert(dest_port_id < snoopLayers.size()); if (!snoopLayers[dest_port_id]->tryTiming(src_port)) { - DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x BUSY\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__, + src_port->name(), pkt->print()); return false; } } else { @@ -409,14 +597,14 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) MasterPort* snoop_port = snoopRespPorts[slave_port_id]; assert(dest_port_id < respLayers.size()); if (!respLayers[dest_port_id]->tryTiming(snoop_port)) { - DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x BUSY\n", - snoop_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__, + snoop_port->name(), pkt->print()); return false; } } - DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x\n", - src_port->name(), pkt->cmdString(), pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + src_port->name(), pkt->print()); // store size and command as they might be modified when // forwarding the packet @@ -426,8 +614,17 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) // responses are never express snoops assert(!pkt->isExpressSnoop()); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // a snoop response sees the snoop response latency, and if it is + // forwarded as a normal response, the response latency + Tick xbar_delay = + (forwardAsSnoop ? snoopResponseLatency : responseLatency) * + clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; // forward it either as a snoop response or a normal response if (forwardAsSnoop) { @@ -466,23 +663,15 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) *slavePorts[dest_port_id]); } - DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x"\ - " FWD RESP\n", src_port->name(), pkt->cmdString(), - pkt->getAddr()); + DPRINTF(CoherentXBar, "%s: src %s packet %s FWD RESP\n", __func__, + src_port->name(), pkt->print()); // as a normal response, it should go back to a master through - // one of our slave ports, at this point we are ignoring the - // fact that the response layer could be busy and do not touch - // its state - bool success M5_VAR_USED = - slavePorts[dest_port_id]->sendTimingResp(pkt); - - // @todo Put the response in an internal FIFO and pass it on - // to the response layer from there - - // currently it is illegal to block responses... can lead - // to deadlock - assert(success); + // one of our slave ports, we also pay for any outstanding + // header latency + Tick latency = pkt->headerDelay; + pkt->headerDelay = 0; + slavePorts[dest_port_id]->schedTimingResp(pkt, curTick() + latency); respLayers[dest_port_id]->succeededTiming(packetFinishTime); } @@ -493,6 +682,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) // stats updates transDist[pkt_cmd]++; snoops++; + snoopTraffic += pkt_size; return true; } @@ -500,10 +690,9 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) void CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id, - const std::vector& dests) + const std::vector& dests) { - DPRINTF(CoherentXBar, "%s for %s address %x size %d\n", __func__, - pkt->cmdString(), pkt->getAddr(), pkt->getSize()); + DPRINTF(CoherentXBar, "%s for %s\n", __func__, pkt->print()); // snoops should only happen if the system isn't bypassing caches assert(!system->bypassCaches()); @@ -537,11 +726,11 @@ CoherentXBar::recvReqRetry(PortID master_port_id) } Tick -CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) +CoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID slave_port_id, + MemBackdoorPtr *backdoor) { - DPRINTF(CoherentXBar, "recvAtomic: packet src %s addr 0x%x cmd %s\n", - slavePorts[slave_port_id]->name(), pkt->getAddr(), - pkt->cmdString()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + slavePorts[slave_port_id]->name(), pkt->print()); unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); @@ -549,8 +738,15 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) MemCmd snoop_response_cmd = MemCmd::InvalidCmd; Tick snoop_response_latency = 0; - // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable() && !system->bypassCaches()) { + // is this the destination point for this packet? (e.g. true if + // this xbar is the PoC for a cache maintenance operation to the + // PoC) otherwise the destination is any cache that can satisfy + // the request + const bool is_destination = isDestination(pkt); + + const bool snoop_caches = !system->bypassCaches() && + pkt->cmd != MemCmd::WriteClean; + if (snoop_caches) { // forward to all snoopers but the source std::pair snoop_result; if (snoopFilter) { @@ -558,12 +754,28 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) auto sf_res = snoopFilter->lookupRequest(pkt, *slavePorts[slave_port_id]); snoop_response_latency += sf_res.second * clockPeriod(); - DPRINTF(CoherentXBar, "%s: src %s %s 0x%x"\ - " SF size: %i lat: %i\n", __func__, - slavePorts[slave_port_id]->name(), pkt->cmdString(), - pkt->getAddr(), sf_res.first.size(), sf_res.second); - snoop_result = forwardAtomic(pkt, slave_port_id, InvalidPortID, - sf_res.first); + DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n", + __func__, slavePorts[slave_port_id]->name(), pkt->print(), + sf_res.first.size(), sf_res.second); + + // let the snoop filter know about the success of the send + // operation, and do it even before sending it onwards to + // avoid situations where atomic upward snoops sneak in + // between and change the filter state + snoopFilter->finishRequest(false, pkt->getAddr(), pkt->isSecure()); + + if (pkt->isEviction()) { + // for block-evicting packets, i.e. writebacks and + // clean evictions, there is no need to snoop up, as + // all we do is determine if the block is cached or + // not, instead just set it here based on the snoop + // filter result + if (!sf_res.first.empty()) + pkt->setBlockCached(); + } else { + snoop_result = forwardAtomic(pkt, slave_port_id, InvalidPortID, + sf_res.first); + } } else { snoop_result = forwardAtomic(pkt, slave_port_id); } @@ -571,21 +783,48 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) snoop_response_latency += snoop_result.second; } + // set up a sensible default value + Tick response_latency = 0; + + const bool sink_packet = sinkPacket(pkt); + // even if we had a snoop response, we must continue and also // perform the actual request at the destination - PortID master_port_id = findPort(pkt->getAddr()); + PortID master_port_id = findPort(pkt->getAddrRange()); + + if (sink_packet) { + DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__, + pkt->print()); + } else { + if (forwardPacket(pkt)) { + // make sure that the write request (e.g., WriteClean) + // will stop at the memory below if this crossbar is its + // destination + if (pkt->isWrite() && is_destination) { + pkt->clearWriteThrough(); + } + + // forward the request to the appropriate destination + auto master = masterPorts[master_port_id]; + response_latency = backdoor ? + master->sendAtomicBackdoor(pkt, *backdoor) : + master->sendAtomic(pkt); + } else { + // if it does not need a response we sink the packet above + assert(pkt->needsResponse()); + + pkt->makeResponse(); + } + } // stats updates for the request pktCount[slave_port_id][master_port_id]++; pktSize[slave_port_id][master_port_id] += pkt_size; transDist[pkt_cmd]++; - // forward the request to the appropriate destination - Tick response_latency = masterPorts[master_port_id]->sendAtomic(pkt); - // Lower levels have replied, tell the snoop filter - if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches() && - pkt->isResponse()) { + // if lower levels have replied, tell the snoop filter + if (!system->bypassCaches() && snoopFilter && pkt->isResponse()) { snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]); } @@ -597,6 +836,30 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) response_latency = snoop_response_latency; } + // If this is the destination of the cache clean operation the + // crossbar is responsible for responding. This crossbar will + // respond when the cache clean is complete. An atomic cache clean + // is complete when the crossbars receives the cache clean + // request (CleanSharedReq, CleanInvalidReq), as either: + // * no cache above had a dirty copy of the block as indicated by + // the satisfied flag of the packet, or + // * the crossbar has already seen the corresponding write + // (WriteClean) which updates the block in the memory below. + if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) { + auto it = outstandingCMO.find(pkt->id); + assert(it != outstandingCMO.end()); + // we are responding right away + outstandingCMO.erase(it); + } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) { + // if this is the destination of the operation, the xbar + // sends the responce to the cache clean operation only + // after having encountered the cache clean request + auto M5_VAR_USED ret = outstandingCMO.emplace(pkt->id, nullptr); + // in atomic mode we know that the WriteClean packet should + // precede the clean request + assert(ret.second); + } + // add the response data if (pkt->isResponse()) { pkt_size = pkt->hasData() ? pkt->getSize() : 0; @@ -616,12 +879,13 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) Tick CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) { - DPRINTF(CoherentXBar, "recvAtomicSnoop: packet src %s addr 0x%x cmd %s\n", - masterPorts[master_port_id]->name(), pkt->getAddr(), - pkt->cmdString()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + masterPorts[master_port_id]->name(), pkt->print()); // add the request snoop data + unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; snoops++; + snoopTraffic += pkt_size; // forward to all snoopers std::pair snoop_result; @@ -629,9 +893,9 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) if (snoopFilter) { auto sf_res = snoopFilter->lookupSnoop(pkt); snoop_response_latency += sf_res.second * clockPeriod(); - DPRINTF(CoherentXBar, "%s: src %s %s 0x%x SF size: %i lat: %i\n", - __func__, masterPorts[master_port_id]->name(), pkt->cmdString(), - pkt->getAddr(), sf_res.first.size(), sf_res.second); + DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n", + __func__, masterPorts[master_port_id]->name(), pkt->print(), + sf_res.first.size(), sf_res.second); snoop_result = forwardAtomic(pkt, InvalidPortID, master_port_id, sf_res.first); } else { @@ -656,7 +920,7 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) std::pair CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id, PortID source_master_port_id, - const std::vector& dests) + const std::vector& dests) { // the packet may be changed on snoops, record the original // command to enable us to restore it between snoops so that @@ -690,7 +954,7 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id, // response from snoop agent assert(pkt->cmd != orig_cmd); - assert(pkt->memInhibitAsserted()); + assert(pkt->cacheResponding()); // should only happen once assert(snoop_response_cmd == MemCmd::InvalidCmd); // save response state @@ -729,14 +993,11 @@ CoherentXBar::recvFunctional(PacketPtr pkt, PortID slave_port_id) { if (!pkt->isPrint()) { // don't do DPRINTFs on PrintReq as it clutters up the output - DPRINTF(CoherentXBar, - "recvFunctional: packet src %s addr 0x%x cmd %s\n", - slavePorts[slave_port_id]->name(), pkt->getAddr(), - pkt->cmdString()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + slavePorts[slave_port_id]->name(), pkt->print()); } - // uncacheable requests need never be snooped - if (!pkt->req->isUncacheable() && !system->bypassCaches()) { + if (!system->bypassCaches()) { // forward to all snoopers but the source forwardFunctional(pkt, slave_port_id); } @@ -744,7 +1005,19 @@ CoherentXBar::recvFunctional(PacketPtr pkt, PortID slave_port_id) // there is no need to continue if the snooping has found what we // were looking for and the packet is already a response if (!pkt->isResponse()) { - PortID dest_id = findPort(pkt->getAddr()); + // since our slave ports are queued ports we need to check them as well + for (const auto& p : slavePorts) { + // if we find a response that has the data, then the + // downstream caches/memories may be out of date, so simply stop + // here + if (p->trySatisfyFunctional(pkt)) { + if (pkt->needsResponse()) + pkt->makeResponse(); + return; + } + } + + PortID dest_id = findPort(pkt->getAddrRange()); masterPorts[dest_id]->sendFunctional(pkt); } @@ -755,10 +1028,16 @@ CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id) { if (!pkt->isPrint()) { // don't do DPRINTFs on PrintReq as it clutters up the output - DPRINTF(CoherentXBar, - "recvFunctionalSnoop: packet src %s addr 0x%x cmd %s\n", - masterPorts[master_port_id]->name(), pkt->getAddr(), - pkt->cmdString()); + DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__, + masterPorts[master_port_id]->name(), pkt->print()); + } + + for (const auto& p : slavePorts) { + if (p->trySatisfyFunctional(pkt)) { + if (pkt->needsResponse()) + pkt->makeResponse(); + return; + } } // forward to all snoopers @@ -787,42 +1066,51 @@ CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id) } } -unsigned int -CoherentXBar::drain(DrainManager *dm) +bool +CoherentXBar::sinkPacket(const PacketPtr pkt) const { - // sum up the individual layers - unsigned int total = 0; - for (auto l: reqLayers) - total += l->drain(dm); - for (auto l: respLayers) - total += l->drain(dm); - for (auto l: snoopLayers) - total += l->drain(dm); - return total; + // we can sink the packet if: + // 1) the crossbar is the point of coherency, and a cache is + // responding after being snooped + // 2) the crossbar is the point of coherency, and the packet is a + // coherency packet (not a read or a write) that does not + // require a response + // 3) this is a clean evict or clean writeback, but the packet is + // found in a cache above this crossbar + // 4) a cache is responding after being snooped, and the packet + // either does not need the block to be writable, or the cache + // that has promised to respond (setting the cache responding + // flag) is providing writable and thus had a Modified block, + // and no further action is needed + return (pointOfCoherency && pkt->cacheResponding()) || + (pointOfCoherency && !(pkt->isRead() || pkt->isWrite()) && + !pkt->needsResponse()) || + (pkt->isCleanEviction() && pkt->isBlockCached()) || + (pkt->cacheResponding() && + (!pkt->needsWritable() || pkt->responderHadWritable())); +} + +bool +CoherentXBar::forwardPacket(const PacketPtr pkt) +{ + // we are forwarding the packet if: + // 1) this is a cache clean request to the PoU/PoC and this + // crossbar is above the PoU/PoC + // 2) this is a read or a write + // 3) this crossbar is above the point of coherency + if (pkt->isClean()) { + return !isDestination(pkt); + } + return pkt->isRead() || pkt->isWrite() || !pointOfCoherency; } + void CoherentXBar::regStats() { - // register the stats of the base class and our layers BaseXBar::regStats(); - for (auto l: reqLayers) - l->regStats(); - for (auto l: respLayers) - l->regStats(); - for (auto l: snoopLayers) - l->regStats(); - - snoops - .name(name() + ".snoops") - .desc("Total snoops (count)") - ; - - snoopFanout - .init(0, snoopPorts.size(), 1) - .name(name() + ".snoop_fanout") - .desc("Request fanout histogram") - ; + + snoopFanout.init(0, snoopPorts.size(), 1); } CoherentXBar *