From 268d9e59c5e69a00456a40c837b0150a8f3f6bf8 Mon Sep 17 00:00:00 2001 From: Marco Balboni Date: Wed, 11 Feb 2015 10:23:47 -0500 Subject: [PATCH] mem: Clarification of packet crossbar timings This patch clarifies the packet timings annotated when going through a crossbar. The old 'firstWordDelay' is replaced by 'headerDelay' that represents the delay associated to the delivery of the header of the packet. The old 'lastWordDelay' is replaced by 'payloadDelay' that represents the delay needed to processing the payload of the packet. For now the uses and values remain identical. However, going forward the payloadDelay will be additive, and not include the headerDelay. Follow-on patches will make the headerDelay capture the pipeline latency incurred in the crossbar, whereas the payloadDelay will capture the additional serialisation delay. --- src/arch/x86/pagetable_walker.cc | 2 +- src/dev/io_device.cc | 2 +- src/dev/pcidev.cc | 2 +- src/dev/x86/intdev.hh | 2 +- src/mem/bridge.cc | 4 ++-- src/mem/cache/cache_impl.hh | 28 ++++++++++++++-------------- src/mem/coherent_xbar.cc | 16 ++++++++-------- src/mem/dram_ctrl.cc | 2 +- src/mem/dramsim2.cc | 2 +- src/mem/external_slave.cc | 4 ++-- src/mem/noncoherent_xbar.cc | 8 ++++---- src/mem/packet.hh | 24 ++++++++++++------------ src/mem/simple_mem.cc | 2 +- src/mem/xbar.cc | 28 ++++++++++++++++------------ src/mem/xbar.hh | 4 ++-- 15 files changed, 67 insertions(+), 63 deletions(-) diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc index f6f8da968..3b82c1bd6 100644 --- a/src/arch/x86/pagetable_walker.cc +++ b/src/arch/x86/pagetable_walker.cc @@ -601,7 +601,7 @@ Walker::WalkerState::recvPacket(PacketPtr pkt) assert(!read); // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; state = nextState; nextState = Ready; diff --git a/src/dev/io_device.cc b/src/dev/io_device.cc index a536e2973..c25a28dd2 100644 --- a/src/dev/io_device.cc +++ b/src/dev/io_device.cc @@ -55,7 +55,7 @@ Tick PioPort::recvAtomic(PacketPtr pkt) { // @todo: We need to pay for this and not just zero it out - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt)); assert(pkt->isResponse() || pkt->isError()); diff --git a/src/dev/pcidev.cc b/src/dev/pcidev.cc index 715b67603..40be9716d 100644 --- a/src/dev/pcidev.cc +++ b/src/dev/pcidev.cc @@ -80,7 +80,7 @@ PciDevice::PciConfigPort::recvAtomic(PacketPtr pkt) assert(pkt->getAddr() >= configAddr && pkt->getAddr() < configAddr + PCI_CONFIG_SIZE); // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt); } diff --git a/src/dev/x86/intdev.hh b/src/dev/x86/intdev.hh index 294a2b887..d63e64010 100644 --- a/src/dev/x86/intdev.hh +++ b/src/dev/x86/intdev.hh @@ -82,7 +82,7 @@ class IntDevice Tick recvMessage(PacketPtr pkt) { // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; return device->recvMessage(pkt); } }; diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc index a21d123a1..c003677ba 100644 --- a/src/mem/bridge.cc +++ b/src/mem/bridge.cc @@ -137,7 +137,7 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt) DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size()); // @todo: We need to pay for this and not just zero it out - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; slavePort.schedTimingResp(pkt, bridge.clockEdge(delay)); @@ -181,7 +181,7 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt) if (!retryReq) { // @todo: We need to pay for this and not just zero it out - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; masterPort.schedTimingReq(pkt, bridge.clockEdge(delay)); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 2fb0baaa4..29285abce 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -419,7 +419,7 @@ Cache::recvTimingSnoopResp(PacketPtr pkt) pkt->popSenderState(); delete rec; // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // forwardLatency is set here because there is a response from an // upper level cache. memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency)); @@ -486,7 +486,7 @@ Cache::recvTimingReq(PacketPtr pkt) // also reset the bus time that the original packet has // not yet paid for - snoop_pkt->firstWordDelay = snoop_pkt->lastWordDelay = 0; + snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0; // make this an instantaneous express snoop, and let the // other caches in the system know that the packet is @@ -521,7 +521,7 @@ Cache::recvTimingReq(PacketPtr pkt) uncacheableFlush(pkt); // @todo: someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // writes go in write buffer, reads use MSHR, // prefetches are acknowledged (responded to) and dropped @@ -579,7 +579,7 @@ Cache::recvTimingReq(PacketPtr pkt) if (needsResponse) { pkt->makeTimingResponse(); // @todo: Make someone pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // In this case we are considering lat neglecting // responseLatency, modelling hit latency just as @@ -598,7 +598,7 @@ Cache::recvTimingReq(PacketPtr pkt) // miss // @todo: Make someone pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; Addr blk_addr = blockAlign(pkt->getAddr()); MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure()); @@ -1146,8 +1146,8 @@ Cache::recvTimingResp(PacketPtr pkt) // from lower level caches/memory to an upper level cache or // the core. completion_time = clockEdge(responseLatency) + - (transfer_offset ? pkt->lastWordDelay : - pkt->firstWordDelay); + (transfer_offset ? pkt->payloadDelay : + pkt->headerDelay); assert(!target->pkt->req->isUncacheable()); @@ -1163,14 +1163,14 @@ Cache::recvTimingResp(PacketPtr pkt) // from lower level caches/memory to an upper level cache or // the core. completion_time = clockEdge(responseLatency) + - pkt->lastWordDelay; + pkt->payloadDelay; target->pkt->req->setExtraData(0); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path // from lower level cahces/memory to the core. completion_time = clockEdge(responseLatency) + - pkt->lastWordDelay; + pkt->payloadDelay; if (pkt->isRead() && !is_error) { target->pkt->setData(pkt->getConstPtr()); } @@ -1190,7 +1190,7 @@ Cache::recvTimingResp(PacketPtr pkt) target->pkt->getAddr()); } // reset the bus additional time as it is now accounted for - target->pkt->firstWordDelay = target->pkt->lastWordDelay = 0; + target->pkt->headerDelay = target->pkt->payloadDelay = 0; cpuSidePort->schedTimingResp(target->pkt, completion_time); break; @@ -1239,7 +1239,7 @@ Cache::recvTimingResp(PacketPtr pkt) mq = mshr->queue; mq->markPending(mshr); requestMemSideBus((RequestCause)mq->index, clockEdge() + - pkt->lastWordDelay); + pkt->payloadDelay); } else { mq->deallocate(mshr); if (wasFull && !mq->isFull()) { @@ -1512,7 +1512,7 @@ Cache::handleFill(PacketPtr pkt, BlkType *blk, } // We pay for fillLatency here. blk->whenReady = clockEdge() + fillLatency * clockPeriod() + - pkt->lastWordDelay; + pkt->payloadDelay; return blk; } @@ -1548,7 +1548,7 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, assert(req_pkt->isInvalidate() || pkt->sharedAsserted()); pkt->makeTimingResponse(); // @todo Make someone pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; if (pkt->isRead()) { pkt->setDataFromBlock(blk_data, blkSize); } @@ -1599,7 +1599,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, snoopPkt.pushSenderState(new ForwardResponseRecord()); // the snoop packet does not need to wait any additional // time - snoopPkt.firstWordDelay = snoopPkt.lastWordDelay = 0; + snoopPkt.headerDelay = snoopPkt.payloadDelay = 0; cpuSidePort->sendTimingSnoopReq(&snoopPkt); if (snoopPkt.memInhibitAsserted()) { // cache-to-cache response from some upper cache diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc index 02580702b..454de69ce 100644 --- a/src/mem/coherent_xbar.cc +++ b/src/mem/coherent_xbar.cc @@ -168,7 +168,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) unsigned int pkt_cmd = pkt->cmdToIndex(); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->lastWordDelay + curTick(); + Tick packetFinishTime = curTick() + pkt->payloadDelay; // uncacheable requests need never be snooped if (!pkt->req->isUncacheable() && !system->bypassCaches()) { @@ -222,7 +222,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) assert(!pkt->memInhibitAsserted()); // undo the calculation so we can check for 0 again - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n", src_port->name(), pkt->cmdString(), pkt->getAddr()); @@ -301,7 +301,7 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) unsigned int pkt_cmd = pkt->cmdToIndex(); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->lastWordDelay + curTick(); + Tick packetFinishTime = curTick() + pkt->payloadDelay; if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) { // let the snoop filter inspect the response and update its state @@ -427,7 +427,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id) assert(!pkt->isExpressSnoop()); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->lastWordDelay + curTick(); + Tick packetFinishTime = curTick() + pkt->payloadDelay; // forward it either as a snoop response or a normal response if (forwardAsSnoop) { @@ -608,8 +608,8 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) transDist[pkt_cmd]++; } - // @todo: Not setting first-word time - pkt->lastWordDelay = response_latency; + // @todo: Not setting header time + pkt->payloadDelay = response_latency; return response_latency; } @@ -648,8 +648,8 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id) snoops++; } - // @todo: Not setting first-word time - pkt->lastWordDelay = snoop_response_latency; + // @todo: Not setting header time + pkt->payloadDelay = snoop_response_latency; return snoop_response_latency; } diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index e52f03588..f4bea04b0 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -879,7 +879,7 @@ DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) assert(pkt->isResponse()); // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // queue the packet in the response queue to be sent out after // the static latency has passed diff --git a/src/mem/dramsim2.cc b/src/mem/dramsim2.cc index 218500573..eb20b9486 100644 --- a/src/mem/dramsim2.cc +++ b/src/mem/dramsim2.cc @@ -270,7 +270,7 @@ DRAMSim2::accessAndRespond(PacketPtr pkt) assert(pkt->isResponse()); // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; DPRINTF(DRAMSim2, "Queuing response for address %lld\n", pkt->getAddr()); diff --git a/src/mem/external_slave.cc b/src/mem/external_slave.cc index 67800b9a2..a6f72fd71 100644 --- a/src/mem/external_slave.cc +++ b/src/mem/external_slave.cc @@ -124,8 +124,8 @@ void StubSlavePort::ResponseEvent::process() { owner.responsePacket->makeResponse(); - owner.responsePacket->firstWordDelay = 0; - owner.responsePacket->lastWordDelay = 0; + owner.responsePacket->headerDelay = 0; + owner.responsePacket->payloadDelay = 0; if (owner.sendTimingResp(owner.responsePacket)) { owner.responsePacket = NULL; diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc index e93446b47..0cf656f80 100644 --- a/src/mem/noncoherent_xbar.cc +++ b/src/mem/noncoherent_xbar.cc @@ -128,7 +128,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) unsigned int pkt_cmd = pkt->cmdToIndex(); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->lastWordDelay + curTick(); + Tick packetFinishTime = curTick() + pkt->payloadDelay; // before forwarding the packet (and possibly altering it), // remember if we are expecting a response @@ -146,7 +146,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id) src_port->name(), pkt->cmdString(), pkt->getAddr()); // undo the calculation so we can check for 0 again - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // occupy until the header is sent reqLayers[master_port_id]->failedTiming(src_port, @@ -201,7 +201,7 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id) unsigned int pkt_cmd = pkt->cmdToIndex(); calcPacketTiming(pkt); - Tick packetFinishTime = pkt->lastWordDelay + curTick(); + Tick packetFinishTime = curTick() + pkt->payloadDelay; // send the packet through the destination slave port bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt); @@ -265,7 +265,7 @@ NoncoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id) } // @todo: Not setting first-word time - pkt->lastWordDelay = response_latency; + pkt->payloadDelay = response_latency; return response_latency; } diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 2917262c3..e80307ffc 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2014 ARM Limited + * Copyright (c) 2012-2015 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -313,23 +313,23 @@ class Packet : public Printable public: /** - * The extra delay from seeing the packet until the first word is + * The extra delay from seeing the packet until the header is * transmitted. This delay is used to communicate the crossbar * forwarding latency to the neighbouring object (e.g. a cache) * that actually makes the packet wait. As the delay is relative, * a 32-bit unsigned should be sufficient. */ - uint32_t firstWordDelay; + uint32_t headerDelay; /** - * The extra pipelining delay from seeing the packet until the - * last word is transmitted by the component that provided it (if - * any). This includes the first word delay. Similar to the first - * word delay, this is used to make up for the fact that the + * The extra pipelining delay from seeing the packet until the end of + * payload is transmitted by the component that provided it (if + * any). This includes the header delay. Similar to the header + * delay, this is used to make up for the fact that the * crossbar does not make the packet wait. As the delay is * relative, a 32-bit unsigned should be sufficient. */ - uint32_t lastWordDelay; + uint32_t payloadDelay; /** * A virtual base opaque structure used to hold state associated @@ -574,7 +574,7 @@ class Packet : public Printable Packet(const RequestPtr _req, MemCmd _cmd) : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), size(0), bytesValidStart(0), bytesValidEnd(0), - firstWordDelay(0), lastWordDelay(0), + headerDelay(0), payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -596,7 +596,7 @@ class Packet : public Printable Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize) : cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false), bytesValidStart(0), bytesValidEnd(0), - firstWordDelay(0), lastWordDelay(0), + headerDelay(0), payloadDelay(0), senderState(NULL) { if (req->hasPaddr()) { @@ -621,8 +621,8 @@ class Packet : public Printable addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size), bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd), - firstWordDelay(pkt->firstWordDelay), - lastWordDelay(pkt->lastWordDelay), + headerDelay(pkt->headerDelay), + payloadDelay(pkt->payloadDelay), senderState(pkt->senderState) { if (!clear_flags) diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc index 4e1020de5..bf89e58fd 100644 --- a/src/mem/simple_mem.cc +++ b/src/mem/simple_mem.cc @@ -125,7 +125,7 @@ SimpleMemory::recvTimingReq(PacketPtr pkt) } // @todo someone should pay for this - pkt->firstWordDelay = pkt->lastWordDelay = 0; + pkt->headerDelay = pkt->payloadDelay = 0; // update the release time according to the bandwidth limit, and // do so with respect to the time it takes to finish this request diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc index d56e726d5..e98b10060 100644 --- a/src/mem/xbar.cc +++ b/src/mem/xbar.cc @@ -109,23 +109,27 @@ BaseXBar::calcPacketTiming(PacketPtr pkt) // until the next clock edge (could be zero) Tick offset = clockEdge() - curTick(); - // determine how many cycles are needed to send the data + // Determine how many cycles are needed to send the data + // If the packet has no data we take into account just the cycle to send + // the header. unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0; // before setting the bus delay fields of the packet, ensure that // the delay from any previous crossbar has been accounted for - if (pkt->firstWordDelay != 0 || pkt->lastWordDelay != 0) + if (pkt->headerDelay != 0 || pkt->payloadDelay != 0) panic("Packet %s already has delay (%d, %d) that should be " - "accounted for.\n", pkt->cmdString(), pkt->firstWordDelay, - pkt->lastWordDelay); - - // The first word will be delivered on the cycle after the header. - pkt->firstWordDelay = (headerCycles + 1) * clockPeriod() + offset; - - // Note that currently lastWordDelay can be smaller than - // firstWordDelay if the packet has no data - pkt->lastWordDelay = (headerCycles + dataCycles) * clockPeriod() + - offset; + "accounted for.\n", pkt->cmdString(), pkt->headerDelay, + pkt->payloadDelay); + + // The headerDelay takes into account the relative time to deliver the + // header of the packet. It will be charged of the additional delay of + // the xbar if the packet goes through it. + pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset; + + // The payloadDelay takes into account the relative time to deliver the + // payload of the packet. If the packet has no data its value is just one + // tick (due to header) plus the offset value. + pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset; } template diff --git a/src/mem/xbar.hh b/src/mem/xbar.hh index 6e7d7afce..81b16c19d 100644 --- a/src/mem/xbar.hh +++ b/src/mem/xbar.hh @@ -333,9 +333,9 @@ class BaseXBar : public MemObject /** * Calculate the timing parameters for the packet. Updates the - * firstWordDelay and lastWordDelay fields of the packet + * headerDelay and payloadDelay fields of the packet * object with the relative number of ticks required to transmit - * the header and the first word, and the last word, respectively. + * the header and the payload, respectively. */ void calcPacketTiming(PacketPtr pkt); -- 2.30.2