mem: Add Units to mem stats

[gem5.git] / src / mem / coherent_xbar.cc
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc

index c0d8ab03758d1a6dbf434c8143f52de6f54d626f..79ef629abcee6a6e3698bd7d8fd16964a6e16bc4 100644 (file)
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2011-2015 ARM Limited
+ * Copyright (c) 2011-2020 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -36,10 +36,6 @@
   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Ali Saidi
- *          Andreas Hansson
- *          William Wang
   */
  
  /**
@@ -47,57 +43,64 @@
   * Definition of a crossbar object.
   */
  
-#include "base/misc.hh"
+#include "mem/coherent_xbar.hh"
+
+#include "base/logging.hh"
  #include "base/trace.hh"
  #include "debug/AddrRanges.hh"
  #include "debug/CoherentXBar.hh"
-#include "mem/coherent_xbar.hh"
  #include "sim/system.hh"
  
-CoherentXBar::CoherentXBar(const CoherentXBarParams *p)
-    : BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter),
-      snoopResponseLatency(p->snoop_response_latency),
-      pointOfCoherency(p->point_of_coherency)
+CoherentXBar::CoherentXBar(const CoherentXBarParams &p)
+    : BaseXBar(p), system(p.system), snoopFilter(p.snoop_filter),
+      snoopResponseLatency(p.snoop_response_latency),
+      maxOutstandingSnoopCheck(p.max_outstanding_snoops),
+      maxRoutingTableSizeCheck(p.max_routing_table_size),
+      pointOfCoherency(p.point_of_coherency),
+      pointOfUnification(p.point_of_unification),
+
+      ADD_STAT(snoops, UNIT_COUNT, "Total snoops"),
+      ADD_STAT(snoopTraffic, UNIT_BYTE, "Total snoop traffic"),
+      ADD_STAT(snoopFanout, UNIT_COUNT, "Request fanout histogram")
  {
-    // create the ports based on the size of the master and slave
-    // vector ports, and the presence of the default port, the ports
-    // are enumerated starting from zero
-    for (int i = 0; i < p->port_master_connection_count; ++i) {
-        std::string portName = csprintf("%s.master[%d]", name(), i);
-        MasterPort* bp = new CoherentXBarMasterPort(portName, *this, i);
-        masterPorts.push_back(bp);
+    // create the ports based on the size of the memory-side port and
+    // CPU-side port vector ports, and the presence of the default port,
+    // the ports are enumerated starting from zero
+    for (int i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
+        std::string portName = csprintf("%s.mem_side_port[%d]", name(), i);
+        RequestPort* bp = new CoherentXBarRequestPort(portName, *this, i);
+        memSidePorts.push_back(bp);
          reqLayers.push_back(new ReqLayer(*bp, *this,
-                                         csprintf(".reqLayer%d", i)));
-        snoopLayers.push_back(new SnoopRespLayer(*bp, *this,
-                                                 csprintf(".snoopLayer%d", i)));
+                                         csprintf("reqLayer%d", i)));
+        snoopLayers.push_back(
+                new SnoopRespLayer(*bp, *this, csprintf("snoopLayer%d", i)));
      }
  
-    // see if we have a default slave device connected and if so add
-    // our corresponding master port
-    if (p->port_default_connection_count) {
-        defaultPortID = masterPorts.size();
+    // see if we have a default CPU-side-port device connected and if so add
+    // our corresponding memory-side port
+    if (p.port_default_connection_count) {
+        defaultPortID = memSidePorts.size();
          std::string portName = name() + ".default";
-        MasterPort* bp = new CoherentXBarMasterPort(portName, *this,
-                                                   defaultPortID);
-        masterPorts.push_back(bp);
-        reqLayers.push_back(new ReqLayer(*bp, *this, csprintf(".reqLayer%d",
-                                             defaultPortID)));
+        RequestPort* bp = new CoherentXBarRequestPort(portName, *this,
+                                                    defaultPortID);
+        memSidePorts.push_back(bp);
+        reqLayers.push_back(new ReqLayer(*bp, *this, csprintf("reqLayer%d",
+                                         defaultPortID)));
          snoopLayers.push_back(new SnoopRespLayer(*bp, *this,
-                                                 csprintf(".snoopLayer%d",
+                                                 csprintf("snoopLayer%d",
                                                            defaultPortID)));
      }
  
-    // create the slave ports, once again starting at zero
-    for (int i = 0; i < p->port_slave_connection_count; ++i) {
-        std::string portName = csprintf("%s.slave[%d]", name(), i);
-        QueuedSlavePort* bp = new CoherentXBarSlavePort(portName, *this, i);
-        slavePorts.push_back(bp);
+    // create the CPU-side ports, once again starting at zero
+    for (int i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
+        std::string portName = csprintf("%s.cpu_side_port[%d]", name(), i);
+        QueuedResponsePort* bp = new CoherentXBarResponsePort(portName,
+                                                            *this, i);
+        cpuSidePorts.push_back(bp);
          respLayers.push_back(new RespLayer(*bp, *this,
-                                           csprintf(".respLayer%d", i)));
+                                           csprintf("respLayer%d", i)));
          snoopRespPorts.push_back(new SnoopRespPort(*bp, *this));
      }
-
-    clearPortCache();
  }
  
  CoherentXBar::~CoherentXBar()
@@ -117,13 +120,13 @@ CoherentXBar::init()
  {
      BaseXBar::init();
  
-    // iterate over our slave ports and determine which of our
-    // neighbouring master ports are snooping and add them as snoopers
-    for (const auto& p: slavePorts) {
-        // check if the connected master port is snooping
+    // iterate over our CPU-side ports and determine which of our
+    // neighbouring memory-side ports are snooping and add them as snoopers
+    for (const auto& p: cpuSidePorts) {
+        // check if the connected memory-side port is snooping
          if (p->isSnooping()) {
-            DPRINTF(AddrRanges, "Adding snooping master %s\n",
-                    p->getMasterPort().name());
+            DPRINTF(AddrRanges, "Adding snooping requestor %s\n",
+                    p->getPeer());
              snoopPorts.push_back(p);
          }
      }
@@ -131,17 +134,17 @@ CoherentXBar::init()
      if (snoopPorts.empty())
          warn("CoherentXBar %s has no snooping ports attached!\n", name());
  
-    // inform the snoop filter about the slave ports so it can create
+    // inform the snoop filter about the CPU-side ports so it can create
      // its own internal representation
      if (snoopFilter)
-        snoopFilter->setSlavePorts(slavePorts);
+        snoopFilter->setCPUSidePorts(cpuSidePorts);
  }
  
  bool
-CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
+CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
  {
      // determine the source port based on the id
-    SlavePort *src_port = slavePorts[slave_port_id];
+    ResponsePort *src_port = cpuSidePorts[cpu_side_port_id];
  
      // remember if the packet is an express snoop
      bool is_express_snoop = pkt->isExpressSnoop();
@@ -150,20 +153,20 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
      // and the cache responding flag should always be the same
      assert(is_express_snoop == cache_responding);
  
-    // determine the destination based on the address
-    PortID master_port_id = findPort(pkt->getAddr());
+    // determine the destination based on the destination address range
+    PortID mem_side_port_id = findPort(pkt->getAddrRange());
  
      // test if the crossbar should be considered occupied for the current
      // port, and exclude express snoops from the check
-    if (!is_express_snoop && !reqLayers[master_port_id]->tryTiming(src_port)) {
-        DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x BUSY\n",
-                src_port->name(), pkt->cmdString(), pkt->getAddr());
+    if (!is_express_snoop &&
+        !reqLayers[mem_side_port_id]->tryTiming(src_port)) {
+        DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
+                src_port->name(), pkt->print());
          return false;
      }
  
-    DPRINTF(CoherentXBar, "recvTimingReq: src %s %s expr %d 0x%x\n",
-            src_port->name(), pkt->cmdString(), is_express_snoop,
-            pkt->getAddr());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            src_port->name(), pkt->print());
  
      // store size and command as they might be modified when
      // forwarding the packet
@@ -180,11 +183,35 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
      calcPacketTiming(pkt, xbar_delay);
  
      // determine how long to be crossbar layer is busy
-    Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
+    Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
  
-    if (!system->bypassCaches()) {
+    // is this the destination point for this packet? (e.g. true if
+    // this xbar is the PoC for a cache maintenance operation to the
+    // PoC) otherwise the destination is any cache that can satisfy
+    // the request
+    const bool is_destination = isDestination(pkt);
+
+    const bool snoop_caches = !system->bypassCaches() &&
+        pkt->cmd != MemCmd::WriteClean;
+    if (snoop_caches) {
          assert(pkt->snoopDelay == 0);
  
+        if (pkt->isClean() && !is_destination) {
+            // before snooping we need to make sure that the memory
+            // below is not busy and the cache clean request can be
+            // forwarded to it
+            if (!memSidePorts[mem_side_port_id]->tryTiming(pkt)) {
+                DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
+                        src_port->name(), pkt->print());
+
+                // update the layer state and schedule an idle event
+                reqLayers[mem_side_port_id]->failedTiming(src_port,
+                                                        clockEdge(Cycles(1)));
+                return false;
+            }
+        }
+
+
          // the packet is a memory-mapped request and should be
          // broadcasted to our snoopers but the source
          if (snoopFilter) {
@@ -194,10 +221,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
              // the xbar has to be charged also with to lookup latency
              // of the snoop filter
              pkt->headerDelay += sf_res.second * clockPeriod();
-            DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\
-                    " SF size: %i lat: %i\n", src_port->name(),
-                    pkt->cmdString(), pkt->getAddr(), sf_res.first.size(),
-                    sf_res.second);
+            DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
+                    __func__, src_port->name(), pkt->print(),
+                    sf_res.first.size(), sf_res.second);
  
              if (pkt->isEviction()) {
                  // for block-evicting packets, i.e. writebacks and
@@ -208,10 +234,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
                  if (!sf_res.first.empty())
                      pkt->setBlockCached();
              } else {
-                forwardTiming(pkt, slave_port_id, sf_res.first);
+                forwardTiming(pkt, cpu_side_port_id, sf_res.first);
              }
          } else {
-            forwardTiming(pkt, slave_port_id);
+            forwardTiming(pkt, cpu_side_port_id);
          }
  
          // add the snoop delay to our header delay, and then reset it
@@ -236,12 +262,12 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
      // modify the address upon a sendTimingRequest
      const Addr addr(pkt->getAddr());
      if (sink_packet) {
-        DPRINTF(CoherentXBar, "Not forwarding %s to %#llx\n",
-                pkt->cmdString(), pkt->getAddr());
+        DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
+                pkt->print());
      } else {
          // determine if we are forwarding the packet, or responding to
          // it
-        if (!pointOfCoherency || pkt->isRead() || pkt->isWrite()) {
+        if (forwardPacket(pkt)) {
              // if we are passing on, rather than sinking, a packet to
              // which an upstream cache has committed to responding,
              // the line was needs writable, and the responding only
@@ -251,8 +277,15 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
                  pkt->setExpressSnoop();
              }
  
+            // make sure that the write request (e.g., WriteClean)
+            // will stop at the memory below if this crossbar is its
+            // destination
+            if (pkt->isWrite() && is_destination) {
+                pkt->clearWriteThrough();
+            }
+
              // since it is a normal request, attempt to send the packet
-            success = masterPorts[master_port_id]->sendTimingReq(pkt);
+            success = memSidePorts[mem_side_port_id]->sendTimingReq(pkt);
          } else {
              // no need to forward, turn this packet around and respond
              // directly
@@ -264,9 +297,9 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
          }
      }
  
-    if (snoopFilter && !system->bypassCaches()) {
+    if (snoopFilter && snoop_caches) {
          // Let the snoop filter know about the success of the send operation
-        snoopFilter->finishRequest(!success, addr);
+        snoopFilter->finishRequest(!success, addr, pkt->isSecure());
      }
  
      // check if we were successful in sending the packet onwards
@@ -277,11 +310,11 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
          // restore the header delay
          pkt->headerDelay = old_header_delay;
  
-        DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
-                src_port->name(), pkt->cmdString(), pkt->getAddr());
+        DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
+                src_port->name(), pkt->print());
  
          // update the layer state and schedule an idle event
-        reqLayers[master_port_id]->failedTiming(src_port,
+        reqLayers[mem_side_port_id]->failedTiming(src_port,
                                                  clockEdge(Cycles(1)));
      } else {
          // express snoops currently bypass the crossbar state entirely
@@ -295,26 +328,28 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
                  outstandingSnoop.insert(pkt->req);
  
                  // basic sanity check on the outstanding snoops
-                panic_if(outstandingSnoop.size() > 512,
-                         "Outstanding snoop requests exceeded 512\n");
+                panic_if(outstandingSnoop.size() > maxOutstandingSnoopCheck,
+                         "%s: Outstanding snoop requests exceeded %d\n",
+                         name(), maxOutstandingSnoopCheck);
              }
  
              // remember where to route the normal response to
              if (expect_response || expect_snoop_resp) {
                  assert(routeTo.find(pkt->req) == routeTo.end());
-                routeTo[pkt->req] = slave_port_id;
+                routeTo[pkt->req] = cpu_side_port_id;
  
-                panic_if(routeTo.size() > 512,
-                         "Routing table exceeds 512 packets\n");
+                panic_if(routeTo.size() > maxRoutingTableSizeCheck,
+                         "%s: Routing table exceeds %d packets\n",
+                         name(), maxRoutingTableSizeCheck);
              }
  
              // update the layer state and schedule an idle event
-            reqLayers[master_port_id]->succeededTiming(packetFinishTime);
+            reqLayers[mem_side_port_id]->succeededTiming(packetFinishTime);
          }
  
          // stats updates only consider packets that were successfully sent
-        pktCount[slave_port_id][master_port_id]++;
-        pktSize[slave_port_id][master_port_id] += pkt_size;
+        pktCount[cpu_side_port_id][mem_side_port_id]++;
+        pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
          transDist[pkt_cmd]++;
  
          if (is_express_snoop) {
@@ -327,49 +362,105 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
          // queue the packet for deletion
          pendingDelete.reset(pkt);
  
+    // normally we respond to the packet we just received if we need to
+    PacketPtr rsp_pkt = pkt;
+    PortID rsp_port_id = cpu_side_port_id;
+
+    // If this is the destination of the cache clean operation the
+    // crossbar is responsible for responding. This crossbar will
+    // respond when the cache clean is complete. A cache clean
+    // is complete either:
+    // * direcly, if no cache above had a dirty copy of the block
+    //   as indicated by the satisfied flag of the packet, or
+    // * when the crossbar has seen both the cache clean request
+    //   (CleanSharedReq, CleanInvalidReq) and the corresponding
+    //   write (WriteClean) which updates the block in the memory
+    //   below.
+    if (success &&
+        ((pkt->isClean() && pkt->satisfied()) ||
+         pkt->cmd == MemCmd::WriteClean) &&
+        is_destination) {
+        PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt;
+        auto cmo_lookup = outstandingCMO.find(pkt->id);
+        if (cmo_lookup != outstandingCMO.end()) {
+            // the cache clean request has already reached this xbar
+            respond_directly = true;
+            if (pkt->isWrite()) {
+                rsp_pkt = cmo_lookup->second;
+                assert(rsp_pkt);
+
+                // determine the destination
+                const auto route_lookup = routeTo.find(rsp_pkt->req);
+                assert(route_lookup != routeTo.end());
+                rsp_port_id = route_lookup->second;
+                assert(rsp_port_id != InvalidPortID);
+                assert(rsp_port_id < respLayers.size());
+                // remove the request from the routing table
+                routeTo.erase(route_lookup);
+            }
+            outstandingCMO.erase(cmo_lookup);
+        } else {
+            respond_directly = false;
+            outstandingCMO.emplace(pkt->id, deferred_rsp);
+            if (!pkt->isWrite()) {
+                assert(routeTo.find(pkt->req) == routeTo.end());
+                routeTo[pkt->req] = cpu_side_port_id;
+
+                panic_if(routeTo.size() > maxRoutingTableSizeCheck,
+                         "%s: Routing table exceeds %d packets\n",
+                         name(), maxRoutingTableSizeCheck);
+            }
+        }
+    }
+
+
      if (respond_directly) {
-        assert(pkt->needsResponse());
+        assert(rsp_pkt->needsResponse());
          assert(success);
  
-        pkt->makeResponse();
+        rsp_pkt->makeResponse();
  
          if (snoopFilter && !system->bypassCaches()) {
              // let the snoop filter inspect the response and update its state
-            snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]);
+            snoopFilter->updateResponse(rsp_pkt, *cpuSidePorts[rsp_port_id]);
          }
  
+        // we send the response after the current packet, even if the
+        // response is not for this packet (e.g. cache clean operation
+        // where both the request and the write packet have to cross
+        // the destination xbar before the response is sent.)
          Tick response_time = clockEdge() + pkt->headerDelay;
-        pkt->headerDelay = 0;
+        rsp_pkt->headerDelay = 0;
  
-        slavePorts[slave_port_id]->schedTimingResp(pkt, response_time);
+        cpuSidePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time);
      }
  
      return success;
  }
  
  bool
-CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
+CoherentXBar::recvTimingResp(PacketPtr pkt, PortID mem_side_port_id)
  {
      // determine the source port based on the id
-    MasterPort *src_port = masterPorts[master_port_id];
+    RequestPort *src_port = memSidePorts[mem_side_port_id];
  
      // determine the destination
      const auto route_lookup = routeTo.find(pkt->req);
      assert(route_lookup != routeTo.end());
-    const PortID slave_port_id = route_lookup->second;
-    assert(slave_port_id != InvalidPortID);
-    assert(slave_port_id < respLayers.size());
+    const PortID cpu_side_port_id = route_lookup->second;
+    assert(cpu_side_port_id != InvalidPortID);
+    assert(cpu_side_port_id < respLayers.size());
  
      // test if the crossbar should be considered occupied for the
      // current port
-    if (!respLayers[slave_port_id]->tryTiming(src_port)) {
-        DPRINTF(CoherentXBar, "recvTimingResp: src %s %s 0x%x BUSY\n",
-                src_port->name(), pkt->cmdString(), pkt->getAddr());
+    if (!respLayers[cpu_side_port_id]->tryTiming(src_port)) {
+        DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
+                src_port->name(), pkt->print());
          return false;
      }
  
-    DPRINTF(CoherentXBar, "recvTimingResp: src %s %s 0x%x\n",
-            src_port->name(), pkt->cmdString(), pkt->getAddr());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            src_port->name(), pkt->print());
  
      // store size and command as they might be modified when
      // forwarding the packet
@@ -383,38 +474,38 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
      calcPacketTiming(pkt, xbar_delay);
  
      // determine how long to be crossbar layer is busy
-    Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
+    Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
  
      if (snoopFilter && !system->bypassCaches()) {
          // let the snoop filter inspect the response and update its state
-        snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]);
+        snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
      }
  
-    // send the packet through the destination slave port and pay for
+    // send the packet through the destination CPU-side port and pay for
      // any outstanding header delay
      Tick latency = pkt->headerDelay;
      pkt->headerDelay = 0;
-    slavePorts[slave_port_id]->schedTimingResp(pkt, curTick() + latency);
+    cpuSidePorts[cpu_side_port_id]->schedTimingResp(pkt, curTick()
+                                        + latency);
  
      // remove the request from the routing table
      routeTo.erase(route_lookup);
  
-    respLayers[slave_port_id]->succeededTiming(packetFinishTime);
+    respLayers[cpu_side_port_id]->succeededTiming(packetFinishTime);
  
      // stats updates
-    pktCount[slave_port_id][master_port_id]++;
-    pktSize[slave_port_id][master_port_id] += pkt_size;
+    pktCount[cpu_side_port_id][mem_side_port_id]++;
+    pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
      transDist[pkt_cmd]++;
  
      return true;
  }
  
  void
-CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id)
+CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID mem_side_port_id)
  {
-    DPRINTF(CoherentXBar, "recvTimingSnoopReq: src %s %s 0x%x\n",
-            masterPorts[master_port_id]->name(), pkt->cmdString(),
-            pkt->getAddr());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            memSidePorts[mem_side_port_id]->name(), pkt->print());
  
      // update stats here as we know the forwarding will succeed
      unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
@@ -442,10 +533,9 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id)
          // the xbar has to be charged also with to lookup latency
          // of the snoop filter
          pkt->headerDelay += sf_res.second * clockPeriod();
-        DPRINTF(CoherentXBar, "recvTimingSnoopReq: src %s %s 0x%x"\
-                " SF size: %i lat: %i\n", masterPorts[master_port_id]->name(),
-                pkt->cmdString(), pkt->getAddr(), sf_res.first.size(),
-                sf_res.second);
+        DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
+                __func__, memSidePorts[mem_side_port_id]->name(),
+                pkt->print(), sf_res.first.size(), sf_res.second);
  
          // forward to all snoopers
          forwardTiming(pkt, InvalidPortID, sf_res.first);
@@ -460,22 +550,22 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID master_port_id)
      // if we can expect a response, remember how to route it
      if (!cache_responding && pkt->cacheResponding()) {
          assert(routeTo.find(pkt->req) == routeTo.end());
-        routeTo[pkt->req] = master_port_id;
+        routeTo[pkt->req] = mem_side_port_id;
      }
  
-    // a snoop request came from a connected slave device (one of
-    // our master ports), and if it is not coming from the slave
+    // a snoop request came from a connected CPU-side-port device (one of
+    // our memory-side ports), and if it is not coming from the CPU-side-port
      // device responsible for the address range something is
      // wrong, hence there is nothing further to do as the packet
      // would be going back to where it came from
-    assert(master_port_id == findPort(pkt->getAddr()));
+    assert(findPort(pkt->getAddrRange()) == mem_side_port_id);
  }
  
  bool
-CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
+CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID cpu_side_port_id)
  {
      // determine the source port based on the id
-    SlavePort* src_port = slavePorts[slave_port_id];
+    ResponsePort* src_port = cpuSidePorts[cpu_side_port_id];
  
      // get the destination
      const auto route_lookup = routeTo.find(pkt->req);
@@ -497,23 +587,23 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
      if (forwardAsSnoop) {
          assert(dest_port_id < snoopLayers.size());
          if (!snoopLayers[dest_port_id]->tryTiming(src_port)) {
-            DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x BUSY\n",
-                    src_port->name(), pkt->cmdString(), pkt->getAddr());
+            DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
+                    src_port->name(), pkt->print());
              return false;
          }
      } else {
-        // get the master port that mirrors this slave port internally
-        MasterPort* snoop_port = snoopRespPorts[slave_port_id];
+        // get the memory-side port that mirrors this CPU-side port internally
+        RequestPort* snoop_port = snoopRespPorts[cpu_side_port_id];
          assert(dest_port_id < respLayers.size());
          if (!respLayers[dest_port_id]->tryTiming(snoop_port)) {
-            DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x BUSY\n",
-                    snoop_port->name(), pkt->cmdString(), pkt->getAddr());
+            DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
+                    snoop_port->name(), pkt->print());
              return false;
          }
      }
  
-    DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x\n",
-            src_port->name(), pkt->cmdString(), pkt->getAddr());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            src_port->name(), pkt->print());
  
      // store size and command as they might be modified when
      // forwarding the packet
@@ -533,7 +623,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
      calcPacketTiming(pkt, xbar_delay);
  
      // determine how long to be crossbar layer is busy
-    Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
+    Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
  
      // forward it either as a snoop response or a normal response
      if (forwardAsSnoop) {
@@ -543,45 +633,48 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
  
          if (snoopFilter) {
              // update the probe filter so that it can properly track the line
-            snoopFilter->updateSnoopForward(pkt, *slavePorts[slave_port_id],
-                                            *masterPorts[dest_port_id]);
+            snoopFilter->updateSnoopForward(pkt,
+                            *cpuSidePorts[cpu_side_port_id],
+                            *memSidePorts[dest_port_id]);
          }
  
-        bool success M5_VAR_USED =
-            masterPorts[dest_port_id]->sendTimingSnoopResp(pkt);
-        pktCount[slave_port_id][dest_port_id]++;
-        pktSize[slave_port_id][dest_port_id] += pkt_size;
+        M5_VAR_USED bool success =
+            memSidePorts[dest_port_id]->sendTimingSnoopResp(pkt);
+        pktCount[cpu_side_port_id][dest_port_id]++;
+        pktSize[cpu_side_port_id][dest_port_id] += pkt_size;
          assert(success);
  
          snoopLayers[dest_port_id]->succeededTiming(packetFinishTime);
      } else {
-        // we got a snoop response on one of our slave ports,
-        // i.e. from a coherent master connected to the crossbar, and
+        // we got a snoop response on one of our CPU-side ports,
+        // i.e. from a coherent requestor connected to the crossbar, and
          // since we created the snoop request as part of recvTiming,
          // this should now be a normal response again
          outstandingSnoop.erase(pkt->req);
  
-        // this is a snoop response from a coherent master, hence it
+        // this is a snoop response from a coherent requestor, hence it
          // should never go back to where the snoop response came from,
          // but instead to where the original request came from
-        assert(slave_port_id != dest_port_id);
+        assert(cpu_side_port_id != dest_port_id);
  
          if (snoopFilter) {
-            // update the probe filter so that it can properly track the line
-            snoopFilter->updateSnoopResponse(pkt, *slavePorts[slave_port_id],
-                                    *slavePorts[dest_port_id]);
+            // update the probe filter so that it can properly track
+            // the line
+            snoopFilter->updateSnoopResponse(pkt,
+                        *cpuSidePorts[cpu_side_port_id],
+                        *cpuSidePorts[dest_port_id]);
          }
  
-        DPRINTF(CoherentXBar, "recvTimingSnoopResp: src %s %s 0x%x"\
-                " FWD RESP\n", src_port->name(), pkt->cmdString(),
-                pkt->getAddr());
+        DPRINTF(CoherentXBar, "%s: src %s packet %s FWD RESP\n", __func__,
+                src_port->name(), pkt->print());
  
-        // as a normal response, it should go back to a master through
-        // one of our slave ports, we also pay for any outstanding
+        // as a normal response, it should go back to a requestor through
+        // one of our CPU-side ports, we also pay for any outstanding
          // header latency
          Tick latency = pkt->headerDelay;
          pkt->headerDelay = 0;
-        slavePorts[dest_port_id]->schedTimingResp(pkt, curTick() + latency);
+        cpuSidePorts[dest_port_id]->schedTimingResp(pkt,
+                                    curTick() + latency);
  
          respLayers[dest_port_id]->succeededTiming(packetFinishTime);
      }
@@ -599,11 +692,10 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
  
  
  void
-CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id,
-                           const std::vector<QueuedSlavePort*>& dests)
+CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_cpu_side_port_id,
+                           const std::vector<QueuedResponsePort*>& dests)
  {
-    DPRINTF(CoherentXBar, "%s for %s address %x size %d\n", __func__,
-            pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+    DPRINTF(CoherentXBar, "%s for %s\n", __func__, pkt->print());
  
      // snoops should only happen if the system isn't bypassing caches
      assert(!system->bypassCaches());
@@ -611,12 +703,12 @@ CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id,
      unsigned fanout = 0;
  
      for (const auto& p: dests) {
-        // we could have gotten this request from a snooping master
-        // (corresponding to our own slave port that is also in
+        // we could have gotten this request from a snooping requestor
+        // (corresponding to our own CPU-side port that is also in
          // snoopPorts) and should not send it back to where it came
          // from
-        if (exclude_slave_port_id == InvalidPortID ||
-            p->getId() != exclude_slave_port_id) {
+        if (exclude_cpu_side_port_id == InvalidPortID ||
+            p->getId() != exclude_cpu_side_port_id) {
              // cache is not allowed to refuse snoop
              p->sendTimingSnoopReq(pkt);
              fanout++;
@@ -628,20 +720,20 @@ CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id,
  }
  
  void
-CoherentXBar::recvReqRetry(PortID master_port_id)
+CoherentXBar::recvReqRetry(PortID mem_side_port_id)
  {
      // responses and snoop responses never block on forwarding them,
      // so the retry will always be coming from a port to which we
      // tried to forward a request
-    reqLayers[master_port_id]->recvRetry();
+    reqLayers[mem_side_port_id]->recvRetry();
  }
  
  Tick
-CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
+CoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
+                                 MemBackdoorPtr *backdoor)
  {
-    DPRINTF(CoherentXBar, "recvAtomic: packet src %s addr 0x%x cmd %s\n",
-            slavePorts[slave_port_id]->name(), pkt->getAddr(),
-            pkt->cmdString());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
  
      unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
      unsigned int pkt_cmd = pkt->cmdToIndex();
@@ -649,29 +741,47 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
      MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
      Tick snoop_response_latency = 0;
  
-    if (!system->bypassCaches()) {
+    // is this the destination point for this packet? (e.g. true if
+    // this xbar is the PoC for a cache maintenance operation to the
+    // PoC) otherwise the destination is any cache that can satisfy
+    // the request
+    const bool is_destination = isDestination(pkt);
+
+    const bool snoop_caches = !system->bypassCaches() &&
+        pkt->cmd != MemCmd::WriteClean;
+    if (snoop_caches) {
          // forward to all snoopers but the source
          std::pair<MemCmd, Tick> snoop_result;
          if (snoopFilter) {
              // check with the snoop filter where to forward this packet
              auto sf_res =
-                snoopFilter->lookupRequest(pkt, *slavePorts[slave_port_id]);
+                snoopFilter->lookupRequest(pkt,
+                *cpuSidePorts [cpu_side_port_id]);
              snoop_response_latency += sf_res.second * clockPeriod();
-            DPRINTF(CoherentXBar, "%s: src %s %s 0x%x"\
-                    " SF size: %i lat: %i\n", __func__,
-                    slavePorts[slave_port_id]->name(), pkt->cmdString(),
-                    pkt->getAddr(), sf_res.first.size(), sf_res.second);
+            DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
+                    __func__, cpuSidePorts[cpu_side_port_id]->name(),
+                    pkt->print(), sf_res.first.size(), sf_res.second);
  
              // let the snoop filter know about the success of the send
              // operation, and do it even before sending it onwards to
              // avoid situations where atomic upward snoops sneak in
              // between and change the filter state
-            snoopFilter->finishRequest(false, pkt->getAddr());
+            snoopFilter->finishRequest(false, pkt->getAddr(), pkt->isSecure());
  
-            snoop_result = forwardAtomic(pkt, slave_port_id, InvalidPortID,
-                                         sf_res.first);
+            if (pkt->isEviction()) {
+                // for block-evicting packets, i.e. writebacks and
+                // clean evictions, there is no need to snoop up, as
+                // all we do is determine if the block is cached or
+                // not, instead just set it here based on the snoop
+                // filter result
+                if (!sf_res.first.empty())
+                    pkt->setBlockCached();
+            } else {
+                snoop_result = forwardAtomic(pkt, cpu_side_port_id,
+                                            InvalidPortID, sf_res.first);
+            }
          } else {
-            snoop_result = forwardAtomic(pkt, slave_port_id);
+            snoop_result = forwardAtomic(pkt, cpu_side_port_id);
          }
          snoop_response_cmd = snoop_result.first;
          snoop_response_latency += snoop_result.second;
@@ -684,15 +794,25 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
  
      // even if we had a snoop response, we must continue and also
      // perform the actual request at the destination
-    PortID master_port_id = findPort(pkt->getAddr());
+    PortID mem_side_port_id = findPort(pkt->getAddrRange());
  
      if (sink_packet) {
-        DPRINTF(CoherentXBar, "Not forwarding %s to %#llx\n",
-                pkt->cmdString(), pkt->getAddr());
+        DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
+                pkt->print());
      } else {
-        if (!pointOfCoherency || pkt->isRead() || pkt->isWrite()) {
+        if (forwardPacket(pkt)) {
+            // make sure that the write request (e.g., WriteClean)
+            // will stop at the memory below if this crossbar is its
+            // destination
+            if (pkt->isWrite() && is_destination) {
+                pkt->clearWriteThrough();
+            }
+
              // forward the request to the appropriate destination
-            response_latency = masterPorts[master_port_id]->sendAtomic(pkt);
+            auto mem_side_port = memSidePorts[mem_side_port_id];
+            response_latency = backdoor ?
+                mem_side_port->sendAtomicBackdoor(pkt, *backdoor) :
+                mem_side_port->sendAtomic(pkt);
          } else {
              // if it does not need a response we sink the packet above
              assert(pkt->needsResponse());
@@ -702,14 +822,14 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
      }
  
      // stats updates for the request
-    pktCount[slave_port_id][master_port_id]++;
-    pktSize[slave_port_id][master_port_id] += pkt_size;
+    pktCount[cpu_side_port_id][mem_side_port_id]++;
+    pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
      transDist[pkt_cmd]++;
  
  
      // if lower levels have replied, tell the snoop filter
      if (!system->bypassCaches() && snoopFilter && pkt->isResponse()) {
-        snoopFilter->updateResponse(pkt, *slavePorts[slave_port_id]);
+        snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
      }
  
      // if we got a response from a snooper, restore it here
@@ -720,14 +840,38 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
          response_latency = snoop_response_latency;
      }
  
+    // If this is the destination of the cache clean operation the
+    // crossbar is responsible for responding. This crossbar will
+    // respond when the cache clean is complete. An atomic cache clean
+    // is complete when the crossbars receives the cache clean
+    // request (CleanSharedReq, CleanInvalidReq), as either:
+    // * no cache above had a dirty copy of the block as indicated by
+    //   the satisfied flag of the packet, or
+    // * the crossbar has already seen the corresponding write
+    //   (WriteClean) which updates the block in the memory below.
+    if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) {
+        auto it = outstandingCMO.find(pkt->id);
+        assert(it != outstandingCMO.end());
+        // we are responding right away
+        outstandingCMO.erase(it);
+    } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) {
+        // if this is the destination of the operation, the xbar
+        // sends the responce to the cache clean operation only
+        // after having encountered the cache clean request
+        M5_VAR_USED auto ret = outstandingCMO.emplace(pkt->id, nullptr);
+        // in atomic mode we know that the WriteClean packet should
+        // precede the clean request
+        assert(ret.second);
+    }
+
      // add the response data
      if (pkt->isResponse()) {
          pkt_size = pkt->hasData() ? pkt->getSize() : 0;
          pkt_cmd = pkt->cmdToIndex();
  
          // stats updates
-        pktCount[slave_port_id][master_port_id]++;
-        pktSize[slave_port_id][master_port_id] += pkt_size;
+        pktCount[cpu_side_port_id][mem_side_port_id]++;
+        pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
          transDist[pkt_cmd]++;
      }
  
@@ -737,11 +881,10 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
  }
  
  Tick
-CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
+CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID mem_side_port_id)
  {
-    DPRINTF(CoherentXBar, "recvAtomicSnoop: packet src %s addr 0x%x cmd %s\n",
-            masterPorts[master_port_id]->name(), pkt->getAddr(),
-            pkt->cmdString());
+    DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+            memSidePorts[mem_side_port_id]->name(), pkt->print());
  
      // add the request snoop data
      unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
@@ -754,10 +897,10 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
      if (snoopFilter) {
          auto sf_res = snoopFilter->lookupSnoop(pkt);
          snoop_response_latency += sf_res.second * clockPeriod();
-        DPRINTF(CoherentXBar, "%s: src %s %s 0x%x SF size: %i lat: %i\n",
-                __func__, masterPorts[master_port_id]->name(), pkt->cmdString(),
-                pkt->getAddr(), sf_res.first.size(), sf_res.second);
-        snoop_result = forwardAtomic(pkt, InvalidPortID, master_port_id,
+        DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
+                __func__, memSidePorts[mem_side_port_id]->name(),
+                pkt->print(), sf_res.first.size(), sf_res.second);
+        snoop_result = forwardAtomic(pkt, InvalidPortID, mem_side_port_id,
                                       sf_res.first);
      } else {
          snoop_result = forwardAtomic(pkt, InvalidPortID);
@@ -779,9 +922,9 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
  }
  
  std::pair<MemCmd, Tick>
-CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id,
-                           PortID source_master_port_id,
-                           const std::vector<QueuedSlavePort*>& dests)
+CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_cpu_side_port_id,
+                           PortID source_mem_side_port_id,
+                           const std::vector<QueuedResponsePort*>& dests)
  {
      // the packet may be changed on snoops, record the original
      // command to enable us to restore it between snoops so that
@@ -796,12 +939,12 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id,
      unsigned fanout = 0;
  
      for (const auto& p: dests) {
-        // we could have gotten this request from a snooping master
-        // (corresponding to our own slave port that is also in
+        // we could have gotten this request from a snooping memory-side port
+        // (corresponding to our own CPU-side port that is also in
          // snoopPorts) and should not send it back to where it came
          // from
-        if (exclude_slave_port_id != InvalidPortID &&
-            p->getId() == exclude_slave_port_id)
+        if (exclude_cpu_side_port_id != InvalidPortID &&
+            p->getId() == exclude_cpu_side_port_id)
              continue;
  
          Tick latency = p->sendAtomicSnoop(pkt);
@@ -825,16 +968,16 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id,
          if (snoopFilter) {
              // Handle responses by the snoopers and differentiate between
              // responses to requests from above and snoops from below
-            if (source_master_port_id != InvalidPortID) {
+            if (source_mem_side_port_id != InvalidPortID) {
                  // Getting a response for a snoop from below
-                assert(exclude_slave_port_id == InvalidPortID);
+                assert(exclude_cpu_side_port_id == InvalidPortID);
                  snoopFilter->updateSnoopForward(pkt, *p,
-                             *masterPorts[source_master_port_id]);
+                             *memSidePorts[source_mem_side_port_id]);
              } else {
                  // Getting a response for a request from above
-                assert(source_master_port_id == InvalidPortID);
+                assert(source_mem_side_port_id == InvalidPortID);
                  snoopFilter->updateSnoopResponse(pkt, *p,
-                             *slavePorts[exclude_slave_port_id]);
+                             *cpuSidePorts[exclude_cpu_side_port_id]);
              }
          }
          // restore original packet state for remaining snoopers
@@ -850,55 +993,52 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id,
  }
  
  void
-CoherentXBar::recvFunctional(PacketPtr pkt, PortID slave_port_id)
+CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
  {
      if (!pkt->isPrint()) {
          // don't do DPRINTFs on PrintReq as it clutters up the output
-        DPRINTF(CoherentXBar,
-                "recvFunctional: packet src %s addr 0x%x cmd %s\n",
-                slavePorts[slave_port_id]->name(), pkt->getAddr(),
-                pkt->cmdString());
+        DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+                cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
      }
  
      if (!system->bypassCaches()) {
          // forward to all snoopers but the source
-        forwardFunctional(pkt, slave_port_id);
+        forwardFunctional(pkt, cpu_side_port_id);
      }
  
      // there is no need to continue if the snooping has found what we
      // were looking for and the packet is already a response
      if (!pkt->isResponse()) {
-        // since our slave ports are queued ports we need to check them as well
-        for (const auto& p : slavePorts) {
+        // since our CPU-side ports are queued ports we need to check
+        // them as well
+        for (const auto& p : cpuSidePorts) {
              // if we find a response that has the data, then the
              // downstream caches/memories may be out of date, so simply stop
              // here
-            if (p->checkFunctional(pkt)) {
+            if (p->trySatisfyFunctional(pkt)) {
                  if (pkt->needsResponse())
                      pkt->makeResponse();
                  return;
              }
          }
  
-        PortID dest_id = findPort(pkt->getAddr());
+        PortID dest_id = findPort(pkt->getAddrRange());
  
-        masterPorts[dest_id]->sendFunctional(pkt);
+        memSidePorts[dest_id]->sendFunctional(pkt);
      }
  }
  
  void
-CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id)
+CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID mem_side_port_id)
  {
      if (!pkt->isPrint()) {
          // don't do DPRINTFs on PrintReq as it clutters up the output
-        DPRINTF(CoherentXBar,
-                "recvFunctionalSnoop: packet src %s addr 0x%x cmd %s\n",
-                masterPorts[master_port_id]->name(), pkt->getAddr(),
-                pkt->cmdString());
+        DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
+                memSidePorts[mem_side_port_id]->name(), pkt->print());
      }
  
-    for (const auto& p : slavePorts) {
-        if (p->checkFunctional(pkt)) {
+    for (const auto& p : cpuSidePorts) {
+        if (p->trySatisfyFunctional(pkt)) {
              if (pkt->needsResponse())
                  pkt->makeResponse();
              return;
@@ -910,18 +1050,18 @@ CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id)
  }
  
  void
-CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id)
+CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_cpu_side_port_id)
  {
      // snoops should only happen if the system isn't bypassing caches
      assert(!system->bypassCaches());
  
      for (const auto& p: snoopPorts) {
-        // we could have gotten this request from a snooping master
-        // (corresponding to our own slave port that is also in
+        // we could have gotten this request from a snooping requestor
+        // (corresponding to our own CPU-side port that is also in
          // snoopPorts) and should not send it back to where it came
          // from
-        if (exclude_slave_port_id == InvalidPortID ||
-            p->getId() != exclude_slave_port_id)
+        if (exclude_cpu_side_port_id == InvalidPortID ||
+            p->getId() != exclude_cpu_side_port_id)
              p->sendFunctionalSnoop(pkt);
  
          // if we get a response we are done
@@ -955,37 +1095,25 @@ CoherentXBar::sinkPacket(const PacketPtr pkt) const
           (!pkt->needsWritable() || pkt->responderHadWritable()));
  }
  
+bool
+CoherentXBar::forwardPacket(const PacketPtr pkt)
+{
+    // we are forwarding the packet if:
+    // 1) this is a cache clean request to the PoU/PoC and this
+    //    crossbar is above the PoU/PoC
+    // 2) this is a read or a write
+    // 3) this crossbar is above the point of coherency
+    if (pkt->isClean()) {
+        return !isDestination(pkt);
+    }
+    return pkt->isRead() || pkt->isWrite() || !pointOfCoherency;
+}
+
+
  void
  CoherentXBar::regStats()
  {
-    // register the stats of the base class and our layers
      BaseXBar::regStats();
-    for (auto l: reqLayers)
-        l->regStats();
-    for (auto l: respLayers)
-        l->regStats();
-    for (auto l: snoopLayers)
-        l->regStats();
-
-    snoops
-        .name(name() + ".snoops")
-        .desc("Total snoops (count)")
-    ;
-
-    snoopTraffic
-        .name(name() + ".snoopTraffic")
-        .desc("Total snoop traffic (bytes)")
-    ;
-
-    snoopFanout
-        .init(0, snoopPorts.size(), 1)
-        .name(name() + ".snoop_fanout")
-        .desc("Request fanout histogram")
-    ;
-}
  
-CoherentXBar *
-CoherentXBarParams::create()
-{
-    return new CoherentXBar(this);
+    snoopFanout.init(0, snoopPorts.size(), 1);
  }