From: Marco Balboni Date: Mon, 2 Mar 2015 09:00:46 +0000 (-0500) Subject: mem: Add crossbar latencies X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d35dd71ab4ac44a79ac22dca82277a43cd59f3c6;p=gem5.git mem: Add crossbar latencies This patch introduces latencies in crossbar that were neglected before. In particular, it adds three parameters in crossbar model: front_end_latency, forward_latency, and response_latency. Along with these parameters, three corresponding members are added: frontEndLatency, forwardLatency, and responseLatency. The coherent crossbar has an additional snoop_response_latency. The latency of the request path through the xbar is set as --> frontEndLatency + forwardLatency In case the snoop filter is enabled, the request path latency is charged also by look-up latency of the snoop filter. --> frontEndLatency + SF(lookupLatency) + forwardLatency. The latency of the response path through the xbar is set instead as --> responseLatency. In case of snoop response, if the response is treated as a normal response the latency associated is again --> responseLatency; If instead it is forwarded as snoop response we add an additional variable + snoopResponseLatency and the latency associated is --> snoopResponseLatency; Furthermore, this patch lets the crossbar progress on the next clock edge after an unused retry, changing the time the crossbar considers itself busy after sending a retry that was not acted upon. --- diff --git a/src/mem/XBar.py b/src/mem/XBar.py index 2aeefe132..64910ed72 100644 --- a/src/mem/XBar.py +++ b/src/mem/XBar.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012, 2015 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -49,10 +49,29 @@ class BaseXBar(MemObject): type = 'BaseXBar' abstract = True cxx_header = "mem/xbar.hh" - slave = VectorSlavePort("vector port for connecting masters") - master = VectorMasterPort("vector port for connecting slaves") - header_cycles = Param.Cycles(1, "cycles of overhead per transaction") - width = Param.Unsigned(8, "xbar width (bytes)") + + slave = VectorSlavePort("Vector port for connecting masters") + master = VectorMasterPort("Vector port for connecting slaves") + + # Latencies governing the time taken for the variuos paths a + # packet has through the crossbar. Note that the crossbar itself + # does not add the latency due to assumptions in the coherency + # mechanism. Instead the latency is annotated on the packet and + # left to the neighbouring modules. + # + # A request incurs the frontend latency, possibly snoop filter + # lookup latency, and forward latency. A response incurs the + # response latency. Frontend latency encompasses arbitration and + # deciding what to do when a request arrives. the forward latency + # is the latency involved once a decision is made to forward the + # request. The response latency, is similar to the forward + # latency, but for responses rather than requests. + frontend_latency = Param.Cycles(3, "Frontend latency") + forward_latency = Param.Cycles(4, "Forward latency") + response_latency = Param.Cycles(2, "Response latency") + + # Width governing the throughput of the crossbar + width = Param.Unsigned(8, "Datapath width per port (bytes)") # The default port can be left unconnected, or be used to connect # a default slave port @@ -74,12 +93,21 @@ class CoherentXBar(BaseXBar): type = 'CoherentXBar' cxx_header = "mem/coherent_xbar.hh" + # The coherent crossbar additionally has snoop responses that are + # forwarded after a specific latency. + snoop_response_latency = Param.Cycles(4, "Snoop response latency") + + # An optional snoop filter + snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter") + system = Param.System(Parent.any, "System that the crossbar belongs to.") - snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter.") class SnoopFilter(SimObject): type = 'SnoopFilter' cxx_header = "mem/snoop_filter.hh" - lookup_latency = Param.Cycles(3, "lookup latency (cycles)") + + # Lookup latency of the snoop filter, added to requests that pass + # through a coherent crossbar. + lookup_latency = Param.Cycles(1, "Lookup latency") system = Param.System(Parent.any, "System that the crossbar belongs to.") diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 667ff96f9..d4188f0f2 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -55,7 +55,8 @@ #include "sim/system.hh" CoherentXBar::CoherentXBar(const CoherentXBarParams *p) - : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter) + : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter), + snoopResponseLatency(p->snoop_response_latency) { // create the ports based on the size of the master and slave // vector ports, and the presence of the default port, the ports @@ -167,8 +168,17 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // store the old header delay so we can restore it if needed + Tick old_header_delay = pkt->headerDelay; + + // a request sees the frontend and forward latency + Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; // uncacheable requests need never be snooped if (!pkt->req->isUncacheable() && !system->bypassCaches()) { @@ -177,6 +187,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) if (snoopFilter) { // check with the snoop filter where to forward this packet auto sf_res = snoopFilter->lookupRequest(pkt, *src_port); + // If SnoopFilter is enabled, the total time required by a packet + // to be delivered through the xbar has to be charged also with + // to lookup latency of the snoop filter (sf_res.second). + pkt->headerDelay += sf_res.second * clockPeriod(); packetFinishTime += sf_res.second * clockPeriod(); DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\ " SF size: %i lat: %i\n", src_port->name(), @@ -221,15 +235,15 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) assert(!is_express_snoop); assert(!pkt->memInhibitAsserted()); - // undo the calculation so we can check for 0 again - pkt->headerDelay = pkt->payloadDelay = 0; + // restore the header delay + pkt->headerDelay = old_header_delay; DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); // update the layer state and schedule an idle event reqLayers[master_port_id]->failedTiming(src_port, - clockEdge(headerCycles)); + clockEdge(Cycles(1))); } else { // express snoops currently bypass the crossbar state entirely if (!is_express_snoop) { @@ -300,8 +314,14 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // a response sees the response latency + Tick xbar_delay = responseLatency * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) { // let the snoop filter inspect the response and update its state @@ -426,8 +446,17 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) // responses are never express snoops assert(!pkt->isExpressSnoop()); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // a snoop response sees the snoop response latency, and if it is + // forwarded as a normal response, the response latency + Tick xbar_delay = + (forwardAsSnoop ? snoopResponseLatency : responseLatency) * + clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; // forward it either as a snoop response or a normal response if (forwardAsSnoop) { diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh index ffe4a066b..3cf10689c 100644 --- a/src/mem/coherent_xbar.hh +++ b/src/mem/coherent_xbar.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -272,6 +272,9 @@ class CoherentXBar : public BaseXBar * broadcast needed for probes. NULL denotes an absent filter. */ SnoopFilter *snoopFilter; + /** Cycles of snoop response latency.*/ + const Cycles snoopResponseLatency; + /** Function called by the port when the crossbar is recieving a Timing request packet.*/ bool recvTimingReq(PacketPtr pkt, PortID slave_port_id); diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc index db33f0f70..e2bc85cad 100644 --- a/src/mem/noncoherent_xbar.cc +++ b/src/mem/noncoherent_xbar.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -127,8 +127,17 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // store the old header delay so we can restore it if needed + Tick old_header_delay = pkt->headerDelay; + + // a request sees the frontend and forward latency + Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; // before forwarding the packet (and possibly altering it), // remember if we are expecting a response @@ -145,12 +154,12 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) DPRINTF(NoncoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); - // undo the calculation so we can check for 0 again - pkt->headerDelay = pkt->payloadDelay = 0; + // restore the header delay as it is additive + pkt->headerDelay = old_header_delay; // occupy until the header is sent reqLayers[master_port_id]->failedTiming(src_port, - clockEdge(headerCycles)); + clockEdge(Cycles(1))); return false; } @@ -200,8 +209,14 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0; unsigned int pkt_cmd = pkt->cmdToIndex(); - calcPacketTiming(pkt); - Tick packetFinishTime = curTick() + pkt->payloadDelay; + // a response sees the response latency + Tick xbar_delay = responseLatency * clockPeriod(); + + // set the packet header and payload delay + calcPacketTiming(pkt, xbar_delay); + + // determine how long to be crossbar layer is busy + Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay; // send the packet through the destination slave port bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt); diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh index ba99d9be8..64a1064ab 100644 --- a/src/mem/noncoherent_xbar.hh +++ b/src/mem/noncoherent_xbar.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2011-2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc index 7ac937177..bc649581b 100644 --- a/src/mem/xbar.cc +++ b/src/mem/xbar.cc @@ -56,7 +56,10 @@ BaseXBar::BaseXBar(const BaseXBarParams *p) : MemObject(p), - headerCycles(p->header_cycles), width(p->width), + frontendLatency(p->frontend_latency), + forwardLatency(p->forward_latency), + responseLatency(p->response_latency), + width(p->width), gotAddrRanges(p->port_default_connection_count + p->port_master_connection_count, false), gotAllAddrRanges(false), defaultPortID(InvalidPortID), @@ -102,34 +105,41 @@ BaseXBar::getSlavePort(const std::string &if_name, PortID idx) } void -BaseXBar::calcPacketTiming(PacketPtr pkt) +BaseXBar::calcPacketTiming(PacketPtr pkt, Tick header_delay) { // the crossbar will be called at a time that is not necessarily // coinciding with its own clock, so start by determining how long // until the next clock edge (could be zero) Tick offset = clockEdge() - curTick(); - // Determine how many cycles are needed to send the data - // If the packet has no data we take into account just the cycle to send - // the header. - unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0; - - // before setting the bus delay fields of the packet, ensure that - // the delay from any previous crossbar has been accounted for - if (pkt->headerDelay != 0 || pkt->payloadDelay != 0) - panic("Packet %s already has delay (%d, %d) that should be " - "accounted for.\n", pkt->cmdString(), pkt->headerDelay, - pkt->payloadDelay); - - // The headerDelay takes into account the relative time to deliver the - // header of the packet. It will be charged of the additional delay of - // the xbar if the packet goes through it. - pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset; - - // The payloadDelay takes into account the relative time to deliver the - // payload of the packet. If the packet has no data its value is just one - // tick (due to header) plus the offset value. - pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset; + // the header delay depends on the path through the crossbar, and + // we therefore rely on the caller to provide the actual + // value + pkt->headerDelay += offset + header_delay; + + // note that we add the header delay to the existing value, and + // align it to the crossbar clock + + // do a quick sanity check to ensure the timings are not being + // ignored, note that this specific value may cause problems for + // slower interconnects + panic_if(pkt->headerDelay > SimClock::Int::us, + "Encountered header delay exceeding 1 us\n"); + + if (pkt->hasData()) { + // the payloadDelay takes into account the relative time to + // deliver the payload of the packet, after the header delay, + // we take the maximum since the payload delay could already + // be longer than what this parcitular crossbar enforces. + pkt->payloadDelay = std::max(pkt->payloadDelay, + divCeil(pkt->getSize(), width) * + clockPeriod()); + } + + // the payload delay is not paying for the clock offset as that is + // already done using the header delay, and the payload delay is + // also used to determine how long the crossbar layer is busy and + // thus regulates throughput } template @@ -274,14 +284,15 @@ BaseXBar::Layer::retryWaiting() sendRetry(retryingPort); // If the layer is still in the retry state, sendTiming wasn't - // called in zero time (e.g. the cache does this), burn a cycle + // called in zero time (e.g. the cache does this when a writeback + // is squashed) if (state == RETRY) { // update the state to busy and reset the retrying port, we // have done our bit and sent the retry state = BUSY; - // occupy the crossbar layer until the next cycle ends - occupyLayer(xbar.clockEdge(Cycles(1))); + // occupy the crossbar layer until the next clock edge + occupyLayer(xbar.clockEdge()); } } diff --git a/src/mem/xbar.hh b/src/mem/xbar.hh index f51b08da2..ed678d9d0 100644 --- a/src/mem/xbar.hh +++ b/src/mem/xbar.hh @@ -309,8 +309,15 @@ class BaseXBar : public MemObject { retry_port->sendRetrySnoopResp(); } }; - /** cycles of overhead per transaction */ - const Cycles headerCycles; + /** + * Cycles of front-end pipeline including the delay to accept the request + * and to decode the address. + */ + const Cycles frontendLatency; + /** Cycles of forward latency */ + const Cycles forwardLatency; + /** Cycles of response latency */ + const Cycles responseLatency; /** the width of the xbar in bytes */ const uint32_t width; @@ -404,8 +411,11 @@ class BaseXBar : public MemObject * headerDelay and payloadDelay fields of the packet * object with the relative number of ticks required to transmit * the header and the payload, respectively. + * + * @param pkt Packet to populate with timings + * @param header_delay Header delay to be added */ - void calcPacketTiming(PacketPtr pkt); + void calcPacketTiming(PacketPtr pkt, Tick header_delay); /** * Remember for each of the master ports of the crossbar if we got