From: Andreas Hansson Date: Tue, 2 Dec 2014 11:07:54 +0000 (-0500) Subject: mem: Clean up packet data allocation X-Git-Tag: stable_2015_04_15~96 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ea5ccc70417db08379027ca7344e50cba53063dd;p=gem5.git mem: Clean up packet data allocation This patch attempts to make the rules for data allocation in the packet explicit, understandable, and easy to verify. The constructor that copies a packet is extended with an additional flag "alloc_data" to enable the call site to explicitly say whether the newly created packet is short-lived (a zero-time snoop), or has an unknown life-time and therefore should allocate its own data (or copy a static pointer in the case of static data). The tricky case is the static data. In essence this is a copy-avoidance scheme where the original source of the request (DMA, CPU etc) does not ask the memory system to return data as part of the packet, but instead provides a pointer, and then the memory system carries this pointer around, and copies the appropriate data to the location itself. Thus any derived packet actually never copies any data. As the original source does not copy any data from the response packet when arriving back at the source, we must maintain the copy of the original pointer to not break the system. We might want to revisit this one day and pay the price for a few extra memcpy invocations. All in all this patch should make it easier to grok what is going on in the memory system and how data is actually copied (or not). --- diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 296f31ebd..a161f8085 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -487,17 +487,24 @@ Cache::recvTimingReq(PacketPtr pkt) // that have shared copies. Not necessary if we know that // supplier had exclusive copy to begin with. if (pkt->needsExclusive() && !pkt->isSupplyExclusive()) { - Packet *snoopPkt = new Packet(pkt, true); // clear flags + // create a downstream express snoop with cleared packet + // flags, there is no need to allocate any data as the + // packet is merely used to co-ordinate state transitions + Packet *snoop_pkt = new Packet(pkt, true, false); + // also reset the bus time that the original packet has // not yet paid for - snoopPkt->firstWordDelay = snoopPkt->lastWordDelay = 0; - snoopPkt->setExpressSnoop(); - snoopPkt->assertMemInhibit(); - bool M5_VAR_USED success = memSidePort->sendTimingReq(snoopPkt); - // the packet is marked inhibited and will thus bypass any - // flow control + snoop_pkt->firstWordDelay = snoop_pkt->lastWordDelay = 0; + + // make this an instantaneous express snoop, and let the + // other caches in the system know that the packet is + // inhibited + snoop_pkt->setExpressSnoop(); + snoop_pkt->assertMemInhibit(); + bool M5_VAR_USED success = memSidePort->sendTimingReq(snoop_pkt); + // express snoops always succeed assert(success); - // main memory will delete snoopPkt + // main memory will delete the packet } // since we're the official target but we aren't responding, // delete the packet now. @@ -1570,12 +1577,19 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data, { // sanity check assert(req_pkt->isRequest()); + assert(req_pkt->needsResponse()); DPRINTF(Cache, "%s for %s address %x size %d\n", __func__, req_pkt->cmdString(), req_pkt->getAddr(), req_pkt->getSize()); // timing-mode snoop responses require a new packet, unless we // already made a copy... - PacketPtr pkt = already_copied ? req_pkt : new Packet(req_pkt); + PacketPtr pkt = req_pkt; + if (!already_copied) + // do not clear flags, and allocate space for data if the + // packet needs it (the only packets that carry data are read + // responses) + pkt = new Packet(req_pkt, false, req_pkt->isRead()); + assert(req_pkt->isInvalidate() || pkt->sharedAsserted()); pkt->makeTimingResponse(); // @todo Make someone pay for this @@ -1624,7 +1638,7 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, // rewritten to be relative to cpu-side bus (if any) bool alreadyResponded = pkt->memInhibitAsserted(); if (is_timing) { - Packet snoopPkt(pkt, true); // clear flags + Packet snoopPkt(pkt, true, false); // clear flags, no allocation snoopPkt.setExpressSnoop(); snoopPkt.pushSenderState(new ForwardResponseRecord(pkt->getSrc())); // the snoop packet does not need to wait any additional @@ -1996,7 +2010,7 @@ Cache::getTimingPacket() // at the moment. Without this check we could get a stale // copy from memory that might get used in place of the // dirty one. - Packet snoop_pkt(tgt_pkt, true); + Packet snoop_pkt(tgt_pkt, true, false); snoop_pkt.setExpressSnoop(); snoop_pkt.senderState = mshr; cpuSidePort->sendTimingSnoopReq(&snoop_pkt); @@ -2032,7 +2046,7 @@ Cache::getTimingPacket() // not a cache block request, but a response is expected // make copy of current packet to forward, keep current // copy for response handling - pkt = new Packet(tgt_pkt); + pkt = new Packet(tgt_pkt, false, true); if (pkt->isWrite()) { pkt->setData(tgt_pkt->getConstPtr()); } diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc index bc46ed267..e4b62e230 100644 --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -366,7 +366,11 @@ MSHR::handleSnoop(PacketPtr pkt, Counter _order) // Actual target device (typ. a memory) will delete the // packet on reception, so we need to save a copy here. - PacketPtr cp_pkt = new Packet(pkt, true); + + // Clear flags and also allocate new data as the original + // packet data storage may have been deleted by the time we + // get to send this packet. + PacketPtr cp_pkt = new Packet(pkt, true, true); targets.add(cp_pkt, curTick(), _order, Target::FromSnoop, downstreamPending && targets.needsExclusive); diff --git a/src/mem/packet.hh b/src/mem/packet.hh index d0b210975..dab1b1b95 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -652,9 +652,9 @@ class Packet : public Printable * less than that of the original packet. In this case the new * packet should allocate its own data. */ - Packet(PacketPtr pkt, bool clearFlags = false) + Packet(PacketPtr pkt, bool clear_flags, bool alloc_data) : cmd(pkt->cmd), req(pkt->req), - data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL), + data(nullptr), addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size), src(pkt->src), dest(pkt->dest), bytesValidStart(pkt->bytesValidStart), @@ -663,16 +663,26 @@ class Packet : public Printable lastWordDelay(pkt->lastWordDelay), senderState(pkt->senderState) { - if (!clearFlags) + if (!clear_flags) flags.set(pkt->flags & COPY_FLAGS); flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE)); - flags.set(pkt->flags & STATIC_DATA); - // if we did not copy the static data pointer, allocate data - // dynamically instead - if (!data) - allocate(); + // should we allocate space for data, or not, the express + // snoops do not need to carry any data as they only serve to + // co-ordinate state changes + if (alloc_data) { + // even if asked to allocate data, if the original packet + // holds static data, then the sender will not be doing + // any memcpy on receiving the response, thus we simply + // carry the pointer forward + if (pkt->flags.isSet(STATIC_DATA)) { + data = pkt->data; + flags.set(STATIC_DATA); + } else { + allocate(); + } + } } /** @@ -789,7 +799,14 @@ class Packet : public Printable /** * Set the data pointer to the following value that should not be - * freed. + * freed. Static data allows us to do a single memcpy even if + * multiple packets are required to get from source to destination + * and back. In essence the pointer is set calling dataStatic on + * the original packet, and whenever this packet is copied and + * forwarded the same pointer is passed on. When a packet + * eventually reaches the destination holding the data, it is + * copied once into the location originally set. On the way back + * to the source, no copies are necessary. */ template void @@ -819,7 +836,15 @@ class Packet : public Printable /** * Set the data pointer to a value that should have delete [] - * called on it. + * called on it. Dynamic data is local to this packet, and as the + * packet travels from source to destination, forwarded packets + * will allocate their own data. When a packet reaches the final + * destination it will populate the dynamic data of that specific + * packet, and on the way back towards the source, memcpy will be + * invoked in every step where a new packet was created e.g. in + * the caches. Ultimately when the response reaches the source a + * final memcpy is needed to extract the data from the packet + * before it is deallocated. */ template void @@ -867,7 +892,14 @@ class Packet : public Printable void setData(const uint8_t *p) { + // we should never be copying data onto itself, which means we + // must idenfity packets with static data, as they carry the + // same pointer from source to destination and back + assert(p != getPtr() || flags.isSet(STATIC_DATA)); + if (p != getPtr()) + // for packet with allocated dynamic data, we copy data from + // one to the other, e.g. a forwarded response to a response std::memcpy(getPtr(), p, getSize()); }