From d4ef8368aa1dfb5e1e1ebe155c0fce1070046f83 Mon Sep 17 00:00:00 2001 From: Marco Balboni Date: Mon, 2 Mar 2015 04:00:48 -0500 Subject: [PATCH] mem: Downstream components consumes new crossbar delays This patch makes the caches and memory controllers consume the delay that is annotated to a packet by the crossbar. Previously many components simply threw these delays away. Note that the devices still do not pay for these delays. --- src/mem/cache/cache_impl.hh | 140 +++++++++++++++++++++++------------- src/mem/dram_ctrl.cc | 11 ++- src/mem/dramsim2.cc | 8 ++- 3 files changed, 103 insertions(+), 56 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 32eae66d9..73b23d637 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -417,12 +417,14 @@ Cache::recvTimingSnoopResp(PacketPtr pkt) pkt->popSenderState(); delete rec; - // @todo someone should pay for this - pkt->headerDelay = pkt->payloadDelay = 0; // forwardLatency is set here because there is a response from an // upper level cache. - memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency)); - + // To pay the delay that occurs if the packet comes from the bus, + // we charge also headerDelay. + Tick snoop_resp_time = clockEdge(forwardLatency) + pkt->headerDelay; + // Reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + memSidePort->schedTimingSnoopResp(pkt, snoop_resp_time); } template @@ -519,31 +521,41 @@ Cache::recvTimingReq(PacketPtr pkt) if (pkt->req->isUncacheable()) { uncacheableFlush(pkt); - // @todo: someone should pay for this - pkt->headerDelay = pkt->payloadDelay = 0; - // writes go in write buffer, reads use MSHR, // prefetches are acknowledged (responded to) and dropped if (pkt->cmd.isPrefetch()) { // prefetching (cache loading) uncacheable data is nonsensical pkt->makeTimingResponse(); std::memset(pkt->getPtr(), 0xFF, pkt->getSize()); - // We use lookupLatency here because the request is uncacheable - cpuSidePort->schedTimingResp(pkt, clockEdge(lookupLatency)); + // We use lookupLatency here because the request is uncacheable. + // We pay also for headerDelay that is charged of bus latencies if + // the packet comes from the bus. + Tick time = clockEdge(lookupLatency) + pkt->headerDelay; + // Reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + cpuSidePort->schedTimingResp(pkt, time); return true; } else if (pkt->isWrite() && !pkt->isRead()) { - // We use forwardLatency here because there is an uncached - // memory write, forwarded to WriteBuffer. It specifies the - // latency to allocate an internal buffer and to schedule an - // event to the queued port. - allocateWriteBuffer(pkt, clockEdge(forwardLatency), true); + // We pay also for headerDelay that is charged of bus latencies if + // the packet comes from the bus. + Tick allocate_wr_buffer_time = clockEdge(forwardLatency) + + pkt->headerDelay; + // Reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + allocateWriteBuffer(pkt, allocate_wr_buffer_time, true); } else { // We use forwardLatency here because there is an uncached // memory read, allocateded to MSHR queue (it requires the same // time of forwarding to WriteBuffer, in our assumption). It // specifies the latency to allocate an internal buffer and to // schedule an event to the queued port. - allocateUncachedReadBuffer(pkt, clockEdge(forwardLatency), true); + // We pay also for headerDelay that is charged of bus latencies if + // the packet comes from the bus. + Tick allocate_rd_buffer_time = clockEdge(forwardLatency) + + pkt->headerDelay; + // Reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + allocateUncachedReadBuffer(pkt, allocate_rd_buffer_time, true); } assert(pkt->needsResponse()); // else we should delete it here?? return true; @@ -557,6 +569,20 @@ Cache::recvTimingReq(PacketPtr pkt) // Note that lat is passed by reference here. The function access() calls // accessBlock() which can modify lat value. bool satisfied = access(pkt, blk, lat, writebacks); + // Here we charge the headerDelay that takes into account the latencies + // of the bus, if the packet comes from it. + // The latency charged it is just lat that is the value of lookupLatency + // modified by access() function, or if not just lookupLatency. + // In case of a hit we are neglecting response latency. + // In case of a miss we are neglecting forward latency. + Tick request_time = clockEdge(lat) + pkt->headerDelay; + // Here we condiser forward_time, paying for just forward latency and + // also charging the delay provided by the xbar. + // forward_time is used in allocateWriteBuffer() function, called + // in case of writeback. + Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + // Here we reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; // track time of availability of next prefetch, if any Tick next_pf_time = MaxTick; @@ -580,13 +606,12 @@ Cache::recvTimingReq(PacketPtr pkt) // @todo: Make someone pay for this pkt->headerDelay = pkt->payloadDelay = 0; - // In this case we are considering lat neglecting - // responseLatency, modelling hit latency just as - // lookupLatency We pass lat by reference to access(), - // which calls accessBlock() function. If it is a hit, - // accessBlock() can modify lat to override the - // lookupLatency value. - cpuSidePort->schedTimingResp(pkt, clockEdge(lat)); + // In this case we are considering request_time that takes + // into account the delay of the xbar, if any, and just + // lat, neglecting responseLatency, modelling hit latency + // just as lookupLatency or or the value of lat overriden + // by access(), that calls accessBlock() function. + cpuSidePort->schedTimingResp(pkt, request_time); } else { /// @todo nominally we should just delete the packet here, /// however, until 4-phase stuff we can't because sending @@ -596,9 +621,6 @@ Cache::recvTimingReq(PacketPtr pkt) } else { // miss - // @todo: Make someone pay for this - pkt->headerDelay = pkt->payloadDelay = 0; - Addr blk_addr = blockAlign(pkt->getAddr()); MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure()); @@ -638,7 +660,9 @@ Cache::recvTimingReq(PacketPtr pkt) // (also keeps valgrind from complaining when debugging settings // print out instruction results) std::memset(pkt->getPtr(), 0xFF, pkt->getSize()); - cpuSidePort->schedTimingResp(pkt, clockEdge(lat)); + // request_time is used here, taking into account lat and the delay + // charged if the packet comes from the xbar. + cpuSidePort->schedTimingResp(pkt, request_time); // If an outstanding request is in progress (we found an // MSHR) this is set to null @@ -659,12 +683,13 @@ Cache::recvTimingReq(PacketPtr pkt) if (mshr->threadNum != 0/*pkt->req->threadId()*/) { mshr->threadNum = -1; } - // We use forwardLatency here because it is the same + // We use forward_time here because it is the same // considering new targets. We have multiple requests for the - // same address here. It pecifies the latency to allocate an + // same address here. It specifies the latency to allocate an // internal buffer and to schedule an event to the queued - // port. - mshr->allocateTarget(pkt, clockEdge(forwardLatency), order++); + // port and also takes into account the additional delay of + // the xbar. + mshr->allocateTarget(pkt, forward_time, order++); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); @@ -695,11 +720,12 @@ Cache::recvTimingReq(PacketPtr pkt) // no-write-allocate or bypass accesses this will have to // be changed. if (pkt->cmd == MemCmd::Writeback) { - // We use forwardLatency here because there is an + // We use forward_time here because there is an // uncached memory write, forwarded to WriteBuffer. It // specifies the latency to allocate an internal buffer and to - // schedule an event to the queued port. - allocateWriteBuffer(pkt, clockEdge(forwardLatency), true); + // schedule an event to the queued port and also takes into + // account the additional delay of the xbar. + allocateWriteBuffer(pkt, forward_time, true); } else { if (blk && blk->isValid()) { // If we have a write miss to a valid block, we @@ -721,13 +747,14 @@ Cache::recvTimingReq(PacketPtr pkt) assert(!blk->isWritable()); blk->status &= ~BlkReadable; } - // Here we are using forwardLatency, modelling the latency of + // Here we are using forward_time, modelling the latency of // a miss (outbound) just as forwardLatency, neglecting the // lookupLatency component. In this case this latency value // specifies the latency to allocate an internal buffer and to // schedule an event to the queued port, when a cacheable miss // is forwarded to MSHR queue. - allocateMissBuffer(pkt, clockEdge(forwardLatency), true); + // We take also into account the additional delay of the xbar. + allocateMissBuffer(pkt, forward_time, true); } if (prefetcher) { @@ -737,7 +764,7 @@ Cache::recvTimingReq(PacketPtr pkt) } } } - // Here we condiser just forward latency. + // Here we condiser just forward_time. if (next_pf_time != MaxTick) requestMemSideBus(Request_PF, std::max(clockEdge(forwardLatency), next_pf_time)); @@ -747,7 +774,7 @@ Cache::recvTimingReq(PacketPtr pkt) // We use forwardLatency here because we are copying writebacks // to write buffer. It specifies the latency to allocate an internal // buffer and to schedule an event to the queued port. - allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true); + allocateWriteBuffer(wbPkt, forward_time, true); writebacks.pop_front(); } @@ -1063,6 +1090,13 @@ Cache::recvTimingResp(PacketPtr pkt) int stats_cmd_idx = initial_tgt->pkt->cmdToIndex(); Tick miss_latency = curTick() - initial_tgt->recvTime; PacketList writebacks; + // We need forward_time here because we have a call of + // allocateWriteBuffer() that need this parameter to specify the + // time to request the bus. In this case we use forward latency + // because there is a writeback. We pay also here for headerDelay + // that is charged of bus latencies if the packet comes from the + // bus. + Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; if (pkt->req->isUncacheable()) { assert(pkt->req->masterId() < system->maxMasters()); @@ -1101,6 +1135,9 @@ Cache::recvTimingResp(PacketPtr pkt) switch (target->source) { case MSHR::Target::FromCPU: Tick completion_time; + // Here we charge on completion_time the delay of the xbar if the + // packet comes from it, charged on headerDelay. + completion_time = pkt->headerDelay; // Software prefetch handling for cache closest to core if (target->pkt->cmd.isSWPrefetch() && isTopLevel) { @@ -1140,13 +1177,12 @@ Cache::recvTimingResp(PacketPtr pkt) transfer_offset += blkSize; } - // If critical word (no offset) return first word time. + // If not critical word (offset) return payloadDelay. // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = clockEdge(responseLatency) + - (transfer_offset ? pkt->payloadDelay : - pkt->headerDelay); + completion_time += clockEdge(responseLatency) + + (transfer_offset ? pkt->payloadDelay : 0); assert(!target->pkt->req->isUncacheable()); @@ -1161,14 +1197,14 @@ Cache::recvTimingResp(PacketPtr pkt) // responseLatency is the latency of the return path // from lower level caches/memory to an upper level cache or // the core. - completion_time = clockEdge(responseLatency) + + completion_time += clockEdge(responseLatency) + pkt->payloadDelay; target->pkt->req->setExtraData(0); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path // from lower level cahces/memory to the core. - completion_time = clockEdge(responseLatency) + + completion_time += clockEdge(responseLatency) + pkt->payloadDelay; if (pkt->isRead() && !is_error) { target->pkt->setData(pkt->getConstPtr()); @@ -1188,7 +1224,7 @@ Cache::recvTimingResp(PacketPtr pkt) __func__, target->pkt->cmdString(), target->pkt->getAddr()); } - // reset the bus additional time as it is now accounted for + // Reset the bus additional time as it is now accounted for target->pkt->headerDelay = target->pkt->payloadDelay = 0; cpuSidePort->schedTimingResp(target->pkt, completion_time); break; @@ -1254,6 +1290,8 @@ Cache::recvTimingResp(PacketPtr pkt) requestMemSideBus(Request_PF, next_pf_time); } } + // reset the xbar additional timinig as it is now accounted for + pkt->headerDelay = pkt->payloadDelay = 0; // copy writebacks to write buffer while (!writebacks.empty()) { @@ -1268,8 +1306,7 @@ Cache::recvTimingResp(PacketPtr pkt) // writebacks to write buffer. It specifies the latency to // allocate an internal buffer and to schedule an event to the // queued port. - allocateWriteBuffer(writebackBlk(blk), clockEdge(forwardLatency), - true); + allocateWriteBuffer(writebackBlk(blk), forward_time, true); } blk->invalidate(); } @@ -1546,8 +1583,6 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, assert(req_pkt->isInvalidate() || pkt->sharedAsserted()); pkt->makeTimingResponse(); - // @todo Make someone pay for this - pkt->headerDelay = pkt->payloadDelay = 0; if (pkt->isRead()) { pkt->setDataFromBlock(blk_data, blkSize); } @@ -1563,8 +1598,13 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, } DPRINTF(Cache, "%s created response: %s address %x size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - // We model a snoop just considering forwardLatency - memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency)); + // Here we condiser forward_time, paying for just forward latency and + // also charging the delay provided by the xbar. + // forward_time is used as send_time in next allocateWriteBuffer(). + Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + // Here we reset the timing of the packet. + pkt->headerDelay = pkt->payloadDelay = 0; + memSidePort->schedTimingSnoopResp(pkt, forward_time); } template diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index 8682cbbaf..23eba158d 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -877,13 +877,18 @@ DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) if (needsResponse) { // access already turned the packet into a response assert(pkt->isResponse()); - - // @todo someone should pay for this + // response_time consumes the static latency and is charged also + // with headerDelay that takes into account the delay provided by + // the xbar and also the payloadDelay that takes into account the + // number of data beats. + Tick response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. pkt->headerDelay = pkt->payloadDelay = 0; // queue the packet in the response queue to be sent out after // the static latency has passed - port.schedTimingResp(pkt, curTick() + static_latency); + port.schedTimingResp(pkt, response_time); } else { // @todo the packet is going to be deleted, and the DRAMPacket // is still having a pointer to it diff --git a/src/mem/dramsim2.cc b/src/mem/dramsim2.cc index 7346a4515..be244bd4a 100644 --- a/src/mem/dramsim2.cc +++ b/src/mem/dramsim2.cc @@ -268,8 +268,10 @@ DRAMSim2::accessAndRespond(PacketPtr pkt) if (needsResponse) { // access already turned the packet into a response assert(pkt->isResponse()); - - // @todo someone should pay for this + // Here we pay for xbar additional delay and to process the payload + // of the packet. + Tick time = curTick() + pkt->headerDelay + pkt->payloadDelay; + // Reset the timings of the packet pkt->headerDelay = pkt->payloadDelay = 0; DPRINTF(DRAMSim2, "Queuing response for address %lld\n", @@ -281,7 +283,7 @@ DRAMSim2::accessAndRespond(PacketPtr pkt) // if we are not already waiting for a retry, or are scheduled // to send a response, schedule an event if (!retryResp && !sendResponseEvent.scheduled()) - schedule(sendResponseEvent, curTick()); + schedule(sendResponseEvent, time); } else { // @todo the packet is going to be deleted, and the DRAMPacket // is still having a pointer to it -- 2.30.2