src/mem/coherent_xbar.cc

   1 /*
   2  * Copyright (c) 2011-2020 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2006 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  */
  40
  41 /**
  42  * @file
  43  * Definition of a crossbar object.
  44  */
  45
  46 #include "mem/coherent_xbar.hh"
  47
  48 #include "base/logging.hh"
  49 #include "base/trace.hh"
  50 #include "debug/AddrRanges.hh"
  51 #include "debug/CoherentXBar.hh"
  52 #include "sim/system.hh"
  53
  54 CoherentXBar::CoherentXBar(const CoherentXBarParams &p)
  55     : BaseXBar(p), system(p.system), snoopFilter(p.snoop_filter),
  56       snoopResponseLatency(p.snoop_response_latency),
  57       maxOutstandingSnoopCheck(p.max_outstanding_snoops),
  58       maxRoutingTableSizeCheck(p.max_routing_table_size),
  59       pointOfCoherency(p.point_of_coherency),
  60       pointOfUnification(p.point_of_unification),
  61
  62       ADD_STAT(snoops, UNIT_COUNT, "Total snoops"),
  63       ADD_STAT(snoopTraffic, UNIT_BYTE, "Total snoop traffic"),
  64       ADD_STAT(snoopFanout, UNIT_COUNT, "Request fanout histogram")
  65 {
  66     // create the ports based on the size of the memory-side port and
  67     // CPU-side port vector ports, and the presence of the default port,
  68     // the ports are enumerated starting from zero
  69     for (int i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
  70         std::string portName = csprintf("%s.mem_side_port[%d]", name(), i);
  71         RequestPort* bp = new CoherentXBarRequestPort(portName, *this, i);
  72         memSidePorts.push_back(bp);
  73         reqLayers.push_back(new ReqLayer(*bp, *this,
  74                                          csprintf("reqLayer%d", i)));
  75         snoopLayers.push_back(
  76                 new SnoopRespLayer(*bp, *this, csprintf("snoopLayer%d", i)));
  77     }
  78
  79     // see if we have a default CPU-side-port device connected and if so add
  80     // our corresponding memory-side port
  81     if (p.port_default_connection_count) {
  82         defaultPortID = memSidePorts.size();
  83         std::string portName = name() + ".default";
  84         RequestPort* bp = new CoherentXBarRequestPort(portName, *this,
  85                                                     defaultPortID);
  86         memSidePorts.push_back(bp);
  87         reqLayers.push_back(new ReqLayer(*bp, *this, csprintf("reqLayer%d",
  88                                          defaultPortID)));
  89         snoopLayers.push_back(new SnoopRespLayer(*bp, *this,
  90                                                  csprintf("snoopLayer%d",
  91                                                           defaultPortID)));
  92     }
  93
  94     // create the CPU-side ports, once again starting at zero
  95     for (int i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
  96         std::string portName = csprintf("%s.cpu_side_port[%d]", name(), i);
  97         QueuedResponsePort* bp = new CoherentXBarResponsePort(portName,
  98                                                             *this, i);
  99         cpuSidePorts.push_back(bp);
 100         respLayers.push_back(new RespLayer(*bp, *this,
 101                                            csprintf("respLayer%d", i)));
 102         snoopRespPorts.push_back(new SnoopRespPort(*bp, *this));
 103     }
 104 }
 105
 106 CoherentXBar::~CoherentXBar()
 107 {
 108     for (auto l: reqLayers)
 109         delete l;
 110     for (auto l: respLayers)
 111         delete l;
 112     for (auto l: snoopLayers)
 113         delete l;
 114     for (auto p: snoopRespPorts)
 115         delete p;
 116 }
 117
 118 void
 119 CoherentXBar::init()
 120 {
 121     BaseXBar::init();
 122
 123     // iterate over our CPU-side ports and determine which of our
 124     // neighbouring memory-side ports are snooping and add them as snoopers
 125     for (const auto& p: cpuSidePorts) {
 126         // check if the connected memory-side port is snooping
 127         if (p->isSnooping()) {
 128             DPRINTF(AddrRanges, "Adding snooping requestor %s\n",
 129                     p->getPeer());
 130             snoopPorts.push_back(p);
 131         }
 132     }
 133
 134     if (snoopPorts.empty())
 135         warn("CoherentXBar %s has no snooping ports attached!\n", name());
 136
 137     // inform the snoop filter about the CPU-side ports so it can create
 138     // its own internal representation
 139     if (snoopFilter)
 140         snoopFilter->setCPUSidePorts(cpuSidePorts);
 141 }
 142
 143 bool
 144 CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
 145 {
 146     // determine the source port based on the id
 147     ResponsePort *src_port = cpuSidePorts[cpu_side_port_id];
 148
 149     // remember if the packet is an express snoop
 150     bool is_express_snoop = pkt->isExpressSnoop();
 151     bool cache_responding = pkt->cacheResponding();
 152     // for normal requests, going downstream, the express snoop flag
 153     // and the cache responding flag should always be the same
 154     assert(is_express_snoop == cache_responding);
 155
 156     // determine the destination based on the destination address range
 157     PortID mem_side_port_id = findPort(pkt->getAddrRange());
 158
 159     // test if the crossbar should be considered occupied for the current
 160     // port, and exclude express snoops from the check
 161     if (!is_express_snoop &&
 162         !reqLayers[mem_side_port_id]->tryTiming(src_port)) {
 163         DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
 164                 src_port->name(), pkt->print());
 165         return false;
 166     }
 167
 168     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 169             src_port->name(), pkt->print());
 170
 171     // store size and command as they might be modified when
 172     // forwarding the packet
 173     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 174     unsigned int pkt_cmd = pkt->cmdToIndex();
 175
 176     // store the old header delay so we can restore it if needed
 177     Tick old_header_delay = pkt->headerDelay;
 178
 179     // a request sees the frontend and forward latency
 180     Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
 181
 182     // set the packet header and payload delay
 183     calcPacketTiming(pkt, xbar_delay);
 184
 185     // determine how long to be crossbar layer is busy
 186     Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
 187
 188     // is this the destination point for this packet? (e.g. true if
 189     // this xbar is the PoC for a cache maintenance operation to the
 190     // PoC) otherwise the destination is any cache that can satisfy
 191     // the request
 192     const bool is_destination = isDestination(pkt);
 193
 194     const bool snoop_caches = !system->bypassCaches() &&
 195         pkt->cmd != MemCmd::WriteClean;
 196     if (snoop_caches) {
 197         assert(pkt->snoopDelay == 0);
 198
 199         if (pkt->isClean() && !is_destination) {
 200             // before snooping we need to make sure that the memory
 201             // below is not busy and the cache clean request can be
 202             // forwarded to it
 203             if (!memSidePorts[mem_side_port_id]->tryTiming(pkt)) {
 204                 DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
 205                         src_port->name(), pkt->print());
 206
 207                 // update the layer state and schedule an idle event
 208                 reqLayers[mem_side_port_id]->failedTiming(src_port,
 209                                                         clockEdge(Cycles(1)));
 210                 return false;
 211             }
 212         }
 213
 214
 215         // the packet is a memory-mapped request and should be
 216         // broadcasted to our snoopers but the source
 217         if (snoopFilter) {
 218             // check with the snoop filter where to forward this packet
 219             auto sf_res = snoopFilter->lookupRequest(pkt, *src_port);
 220             // the time required by a packet to be delivered through
 221             // the xbar has to be charged also with to lookup latency
 222             // of the snoop filter
 223             pkt->headerDelay += sf_res.second * clockPeriod();
 224             DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
 225                     __func__, src_port->name(), pkt->print(),
 226                     sf_res.first.size(), sf_res.second);
 227
 228             if (pkt->isEviction()) {
 229                 // for block-evicting packets, i.e. writebacks and
 230                 // clean evictions, there is no need to snoop up, as
 231                 // all we do is determine if the block is cached or
 232                 // not, instead just set it here based on the snoop
 233                 // filter result
 234                 if (!sf_res.first.empty())
 235                     pkt->setBlockCached();
 236             } else {
 237                 forwardTiming(pkt, cpu_side_port_id, sf_res.first);
 238             }
 239         } else {
 240             forwardTiming(pkt, cpu_side_port_id);
 241         }
 242
 243         // add the snoop delay to our header delay, and then reset it
 244         pkt->headerDelay += pkt->snoopDelay;
 245         pkt->snoopDelay = 0;
 246     }
 247
 248     // set up a sensible starting point
 249     bool success = true;
 250
 251     // remember if the packet will generate a snoop response by
 252     // checking if a cache set the cacheResponding flag during the
 253     // snooping above
 254     const bool expect_snoop_resp = !cache_responding && pkt->cacheResponding();
 255     bool expect_response = pkt->needsResponse() && !pkt->cacheResponding();
 256
 257     const bool sink_packet = sinkPacket(pkt);
 258
 259     // in certain cases the crossbar is responsible for responding
 260     bool respond_directly = false;
 261     // store the original address as an address mapper could possibly
 262     // modify the address upon a sendTimingRequest
 263     const Addr addr(pkt->getAddr());
 264     if (sink_packet) {
 265         DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
 266                 pkt->print());
 267     } else {
 268         // determine if we are forwarding the packet, or responding to
 269         // it
 270         if (forwardPacket(pkt)) {
 271             // if we are passing on, rather than sinking, a packet to
 272             // which an upstream cache has committed to responding,
 273             // the line was needs writable, and the responding only
 274             // had an Owned copy, so we need to immidiately let the
 275             // downstream caches know, bypass any flow control
 276             if (pkt->cacheResponding()) {
 277                 pkt->setExpressSnoop();
 278             }
 279
 280             // make sure that the write request (e.g., WriteClean)
 281             // will stop at the memory below if this crossbar is its
 282             // destination
 283             if (pkt->isWrite() && is_destination) {
 284                 pkt->clearWriteThrough();
 285             }
 286
 287             // since it is a normal request, attempt to send the packet
 288             success = memSidePorts[mem_side_port_id]->sendTimingReq(pkt);
 289         } else {
 290             // no need to forward, turn this packet around and respond
 291             // directly
 292             assert(pkt->needsResponse());
 293
 294             respond_directly = true;
 295             assert(!expect_snoop_resp);
 296             expect_response = false;
 297         }
 298     }
 299
 300     if (snoopFilter && snoop_caches) {
 301         // Let the snoop filter know about the success of the send operation
 302         snoopFilter->finishRequest(!success, addr, pkt->isSecure());
 303     }
 304
 305     // check if we were successful in sending the packet onwards
 306     if (!success)  {
 307         // express snoops should never be forced to retry
 308         assert(!is_express_snoop);
 309
 310         // restore the header delay
 311         pkt->headerDelay = old_header_delay;
 312
 313         DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
 314                 src_port->name(), pkt->print());
 315
 316         // update the layer state and schedule an idle event
 317         reqLayers[mem_side_port_id]->failedTiming(src_port,
 318                                                 clockEdge(Cycles(1)));
 319     } else {
 320         // express snoops currently bypass the crossbar state entirely
 321         if (!is_express_snoop) {
 322             // if this particular request will generate a snoop
 323             // response
 324             if (expect_snoop_resp) {
 325                 // we should never have an exsiting request outstanding
 326                 assert(outstandingSnoop.find(pkt->req) ==
 327                        outstandingSnoop.end());
 328                 outstandingSnoop.insert(pkt->req);
 329
 330                 // basic sanity check on the outstanding snoops
 331                 panic_if(outstandingSnoop.size() > maxOutstandingSnoopCheck,
 332                          "%s: Outstanding snoop requests exceeded %d\n",
 333                          name(), maxOutstandingSnoopCheck);
 334             }
 335
 336             // remember where to route the normal response to
 337             if (expect_response || expect_snoop_resp) {
 338                 assert(routeTo.find(pkt->req) == routeTo.end());
 339                 routeTo[pkt->req] = cpu_side_port_id;
 340
 341                 panic_if(routeTo.size() > maxRoutingTableSizeCheck,
 342                          "%s: Routing table exceeds %d packets\n",
 343                          name(), maxRoutingTableSizeCheck);
 344             }
 345
 346             // update the layer state and schedule an idle event
 347             reqLayers[mem_side_port_id]->succeededTiming(packetFinishTime);
 348         }
 349
 350         // stats updates only consider packets that were successfully sent
 351         pktCount[cpu_side_port_id][mem_side_port_id]++;
 352         pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
 353         transDist[pkt_cmd]++;
 354
 355         if (is_express_snoop) {
 356             snoops++;
 357             snoopTraffic += pkt_size;
 358         }
 359     }
 360
 361     if (sink_packet)
 362         // queue the packet for deletion
 363         pendingDelete.reset(pkt);
 364
 365     // normally we respond to the packet we just received if we need to
 366     PacketPtr rsp_pkt = pkt;
 367     PortID rsp_port_id = cpu_side_port_id;
 368
 369     // If this is the destination of the cache clean operation the
 370     // crossbar is responsible for responding. This crossbar will
 371     // respond when the cache clean is complete. A cache clean
 372     // is complete either:
 373     // * direcly, if no cache above had a dirty copy of the block
 374     //   as indicated by the satisfied flag of the packet, or
 375     // * when the crossbar has seen both the cache clean request
 376     //   (CleanSharedReq, CleanInvalidReq) and the corresponding
 377     //   write (WriteClean) which updates the block in the memory
 378     //   below.
 379     if (success &&
 380         ((pkt->isClean() && pkt->satisfied()) ||
 381          pkt->cmd == MemCmd::WriteClean) &&
 382         is_destination) {
 383         PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt;
 384         auto cmo_lookup = outstandingCMO.find(pkt->id);
 385         if (cmo_lookup != outstandingCMO.end()) {
 386             // the cache clean request has already reached this xbar
 387             respond_directly = true;
 388             if (pkt->isWrite()) {
 389                 rsp_pkt = cmo_lookup->second;
 390                 assert(rsp_pkt);
 391
 392                 // determine the destination
 393                 const auto route_lookup = routeTo.find(rsp_pkt->req);
 394                 assert(route_lookup != routeTo.end());
 395                 rsp_port_id = route_lookup->second;
 396                 assert(rsp_port_id != InvalidPortID);
 397                 assert(rsp_port_id < respLayers.size());
 398                 // remove the request from the routing table
 399                 routeTo.erase(route_lookup);
 400             }
 401             outstandingCMO.erase(cmo_lookup);
 402         } else {
 403             respond_directly = false;
 404             outstandingCMO.emplace(pkt->id, deferred_rsp);
 405             if (!pkt->isWrite()) {
 406                 assert(routeTo.find(pkt->req) == routeTo.end());
 407                 routeTo[pkt->req] = cpu_side_port_id;
 408
 409                 panic_if(routeTo.size() > maxRoutingTableSizeCheck,
 410                          "%s: Routing table exceeds %d packets\n",
 411                          name(), maxRoutingTableSizeCheck);
 412             }
 413         }
 414     }
 415
 416
 417     if (respond_directly) {
 418         assert(rsp_pkt->needsResponse());
 419         assert(success);
 420
 421         rsp_pkt->makeResponse();
 422
 423         if (snoopFilter && !system->bypassCaches()) {
 424             // let the snoop filter inspect the response and update its state
 425             snoopFilter->updateResponse(rsp_pkt, *cpuSidePorts[rsp_port_id]);
 426         }
 427
 428         // we send the response after the current packet, even if the
 429         // response is not for this packet (e.g. cache clean operation
 430         // where both the request and the write packet have to cross
 431         // the destination xbar before the response is sent.)
 432         Tick response_time = clockEdge() + pkt->headerDelay;
 433         rsp_pkt->headerDelay = 0;
 434
 435         cpuSidePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time);
 436     }
 437
 438     return success;
 439 }
 440
 441 bool
 442 CoherentXBar::recvTimingResp(PacketPtr pkt, PortID mem_side_port_id)
 443 {
 444     // determine the source port based on the id
 445     RequestPort *src_port = memSidePorts[mem_side_port_id];
 446
 447     // determine the destination
 448     const auto route_lookup = routeTo.find(pkt->req);
 449     assert(route_lookup != routeTo.end());
 450     const PortID cpu_side_port_id = route_lookup->second;
 451     assert(cpu_side_port_id != InvalidPortID);
 452     assert(cpu_side_port_id < respLayers.size());
 453
 454     // test if the crossbar should be considered occupied for the
 455     // current port
 456     if (!respLayers[cpu_side_port_id]->tryTiming(src_port)) {
 457         DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
 458                 src_port->name(), pkt->print());
 459         return false;
 460     }
 461
 462     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 463             src_port->name(), pkt->print());
 464
 465     // store size and command as they might be modified when
 466     // forwarding the packet
 467     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 468     unsigned int pkt_cmd = pkt->cmdToIndex();
 469
 470     // a response sees the response latency
 471     Tick xbar_delay = responseLatency * clockPeriod();
 472
 473     // set the packet header and payload delay
 474     calcPacketTiming(pkt, xbar_delay);
 475
 476     // determine how long to be crossbar layer is busy
 477     Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
 478
 479     if (snoopFilter && !system->bypassCaches()) {
 480         // let the snoop filter inspect the response and update its state
 481         snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
 482     }
 483
 484     // send the packet through the destination CPU-side port and pay for
 485     // any outstanding header delay
 486     Tick latency = pkt->headerDelay;
 487     pkt->headerDelay = 0;
 488     cpuSidePorts[cpu_side_port_id]->schedTimingResp(pkt, curTick()
 489                                         + latency);
 490
 491     // remove the request from the routing table
 492     routeTo.erase(route_lookup);
 493
 494     respLayers[cpu_side_port_id]->succeededTiming(packetFinishTime);
 495
 496     // stats updates
 497     pktCount[cpu_side_port_id][mem_side_port_id]++;
 498     pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
 499     transDist[pkt_cmd]++;
 500
 501     return true;
 502 }
 503
 504 void
 505 CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID mem_side_port_id)
 506 {
 507     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 508             memSidePorts[mem_side_port_id]->name(), pkt->print());
 509
 510     // update stats here as we know the forwarding will succeed
 511     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 512     transDist[pkt->cmdToIndex()]++;
 513     snoops++;
 514     snoopTraffic += pkt_size;
 515
 516     // we should only see express snoops from caches
 517     assert(pkt->isExpressSnoop());
 518
 519     // set the packet header and payload delay, for now use forward latency
 520     // @todo Assess the choice of latency further
 521     calcPacketTiming(pkt, forwardLatency * clockPeriod());
 522
 523     // remember if a cache has already committed to responding so we
 524     // can see if it changes during the snooping
 525     const bool cache_responding = pkt->cacheResponding();
 526
 527     assert(pkt->snoopDelay == 0);
 528
 529     if (snoopFilter) {
 530         // let the Snoop Filter work its magic and guide probing
 531         auto sf_res = snoopFilter->lookupSnoop(pkt);
 532         // the time required by a packet to be delivered through
 533         // the xbar has to be charged also with to lookup latency
 534         // of the snoop filter
 535         pkt->headerDelay += sf_res.second * clockPeriod();
 536         DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
 537                 __func__, memSidePorts[mem_side_port_id]->name(),
 538                 pkt->print(), sf_res.first.size(), sf_res.second);
 539
 540         // forward to all snoopers
 541         forwardTiming(pkt, InvalidPortID, sf_res.first);
 542     } else {
 543         forwardTiming(pkt, InvalidPortID);
 544     }
 545
 546     // add the snoop delay to our header delay, and then reset it
 547     pkt->headerDelay += pkt->snoopDelay;
 548     pkt->snoopDelay = 0;
 549
 550     // if we can expect a response, remember how to route it
 551     if (!cache_responding && pkt->cacheResponding()) {
 552         assert(routeTo.find(pkt->req) == routeTo.end());
 553         routeTo[pkt->req] = mem_side_port_id;
 554     }
 555
 556     // a snoop request came from a connected CPU-side-port device (one of
 557     // our memory-side ports), and if it is not coming from the CPU-side-port
 558     // device responsible for the address range something is
 559     // wrong, hence there is nothing further to do as the packet
 560     // would be going back to where it came from
 561     assert(findPort(pkt->getAddrRange()) == mem_side_port_id);
 562 }
 563
 564 bool
 565 CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID cpu_side_port_id)
 566 {
 567     // determine the source port based on the id
 568     ResponsePort* src_port = cpuSidePorts[cpu_side_port_id];
 569
 570     // get the destination
 571     const auto route_lookup = routeTo.find(pkt->req);
 572     assert(route_lookup != routeTo.end());
 573     const PortID dest_port_id = route_lookup->second;
 574     assert(dest_port_id != InvalidPortID);
 575
 576     // determine if the response is from a snoop request we
 577     // created as the result of a normal request (in which case it
 578     // should be in the outstandingSnoop), or if we merely forwarded
 579     // someone else's snoop request
 580     const bool forwardAsSnoop = outstandingSnoop.find(pkt->req) ==
 581         outstandingSnoop.end();
 582
 583     // test if the crossbar should be considered occupied for the
 584     // current port, note that the check is bypassed if the response
 585     // is being passed on as a normal response since this is occupying
 586     // the response layer rather than the snoop response layer
 587     if (forwardAsSnoop) {
 588         assert(dest_port_id < snoopLayers.size());
 589         if (!snoopLayers[dest_port_id]->tryTiming(src_port)) {
 590             DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
 591                     src_port->name(), pkt->print());
 592             return false;
 593         }
 594     } else {
 595         // get the memory-side port that mirrors this CPU-side port internally
 596         RequestPort* snoop_port = snoopRespPorts[cpu_side_port_id];
 597         assert(dest_port_id < respLayers.size());
 598         if (!respLayers[dest_port_id]->tryTiming(snoop_port)) {
 599             DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
 600                     snoop_port->name(), pkt->print());
 601             return false;
 602         }
 603     }
 604
 605     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 606             src_port->name(), pkt->print());
 607
 608     // store size and command as they might be modified when
 609     // forwarding the packet
 610     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 611     unsigned int pkt_cmd = pkt->cmdToIndex();
 612
 613     // responses are never express snoops
 614     assert(!pkt->isExpressSnoop());
 615
 616     // a snoop response sees the snoop response latency, and if it is
 617     // forwarded as a normal response, the response latency
 618     Tick xbar_delay =
 619         (forwardAsSnoop ? snoopResponseLatency : responseLatency) *
 620         clockPeriod();
 621
 622     // set the packet header and payload delay
 623     calcPacketTiming(pkt, xbar_delay);
 624
 625     // determine how long to be crossbar layer is busy
 626     Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
 627
 628     // forward it either as a snoop response or a normal response
 629     if (forwardAsSnoop) {
 630         // this is a snoop response to a snoop request we forwarded,
 631         // e.g. coming from the L1 and going to the L2, and it should
 632         // be forwarded as a snoop response
 633
 634         if (snoopFilter) {
 635             // update the probe filter so that it can properly track the line
 636             snoopFilter->updateSnoopForward(pkt,
 637                             *cpuSidePorts[cpu_side_port_id],
 638                             *memSidePorts[dest_port_id]);
 639         }
 640
 641         M5_VAR_USED bool success =
 642             memSidePorts[dest_port_id]->sendTimingSnoopResp(pkt);
 643         pktCount[cpu_side_port_id][dest_port_id]++;
 644         pktSize[cpu_side_port_id][dest_port_id] += pkt_size;
 645         assert(success);
 646
 647         snoopLayers[dest_port_id]->succeededTiming(packetFinishTime);
 648     } else {
 649         // we got a snoop response on one of our CPU-side ports,
 650         // i.e. from a coherent requestor connected to the crossbar, and
 651         // since we created the snoop request as part of recvTiming,
 652         // this should now be a normal response again
 653         outstandingSnoop.erase(pkt->req);
 654
 655         // this is a snoop response from a coherent requestor, hence it
 656         // should never go back to where the snoop response came from,
 657         // but instead to where the original request came from
 658         assert(cpu_side_port_id != dest_port_id);
 659
 660         if (snoopFilter) {
 661             // update the probe filter so that it can properly track
 662             // the line
 663             snoopFilter->updateSnoopResponse(pkt,
 664                         *cpuSidePorts[cpu_side_port_id],
 665                         *cpuSidePorts[dest_port_id]);
 666         }
 667
 668         DPRINTF(CoherentXBar, "%s: src %s packet %s FWD RESP\n", __func__,
 669                 src_port->name(), pkt->print());
 670
 671         // as a normal response, it should go back to a requestor through
 672         // one of our CPU-side ports, we also pay for any outstanding
 673         // header latency
 674         Tick latency = pkt->headerDelay;
 675         pkt->headerDelay = 0;
 676         cpuSidePorts[dest_port_id]->schedTimingResp(pkt,
 677                                     curTick() + latency);
 678
 679         respLayers[dest_port_id]->succeededTiming(packetFinishTime);
 680     }
 681
 682     // remove the request from the routing table
 683     routeTo.erase(route_lookup);
 684
 685     // stats updates
 686     transDist[pkt_cmd]++;
 687     snoops++;
 688     snoopTraffic += pkt_size;
 689
 690     return true;
 691 }
 692
 693
 694 void
 695 CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_cpu_side_port_id,
 696                            const std::vector<QueuedResponsePort*>& dests)
 697 {
 698     DPRINTF(CoherentXBar, "%s for %s\n", __func__, pkt->print());
 699
 700     // snoops should only happen if the system isn't bypassing caches
 701     assert(!system->bypassCaches());
 702
 703     unsigned fanout = 0;
 704
 705     for (const auto& p: dests) {
 706         // we could have gotten this request from a snooping requestor
 707         // (corresponding to our own CPU-side port that is also in
 708         // snoopPorts) and should not send it back to where it came
 709         // from
 710         if (exclude_cpu_side_port_id == InvalidPortID ||
 711             p->getId() != exclude_cpu_side_port_id) {
 712             // cache is not allowed to refuse snoop
 713             p->sendTimingSnoopReq(pkt);
 714             fanout++;
 715         }
 716     }
 717
 718     // Stats for fanout of this forward operation
 719     snoopFanout.sample(fanout);
 720 }
 721
 722 void
 723 CoherentXBar::recvReqRetry(PortID mem_side_port_id)
 724 {
 725     // responses and snoop responses never block on forwarding them,
 726     // so the retry will always be coming from a port to which we
 727     // tried to forward a request
 728     reqLayers[mem_side_port_id]->recvRetry();
 729 }
 730
 731 Tick
 732 CoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
 733                                  MemBackdoorPtr *backdoor)
 734 {
 735     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 736             cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
 737
 738     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 739     unsigned int pkt_cmd = pkt->cmdToIndex();
 740
 741     MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
 742     Tick snoop_response_latency = 0;
 743
 744     // is this the destination point for this packet? (e.g. true if
 745     // this xbar is the PoC for a cache maintenance operation to the
 746     // PoC) otherwise the destination is any cache that can satisfy
 747     // the request
 748     const bool is_destination = isDestination(pkt);
 749
 750     const bool snoop_caches = !system->bypassCaches() &&
 751         pkt->cmd != MemCmd::WriteClean;
 752     if (snoop_caches) {
 753         // forward to all snoopers but the source
 754         std::pair<MemCmd, Tick> snoop_result;
 755         if (snoopFilter) {
 756             // check with the snoop filter where to forward this packet
 757             auto sf_res =
 758                 snoopFilter->lookupRequest(pkt,
 759                 *cpuSidePorts [cpu_side_port_id]);
 760             snoop_response_latency += sf_res.second * clockPeriod();
 761             DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
 762                     __func__, cpuSidePorts[cpu_side_port_id]->name(),
 763                     pkt->print(), sf_res.first.size(), sf_res.second);
 764
 765             // let the snoop filter know about the success of the send
 766             // operation, and do it even before sending it onwards to
 767             // avoid situations where atomic upward snoops sneak in
 768             // between and change the filter state
 769             snoopFilter->finishRequest(false, pkt->getAddr(), pkt->isSecure());
 770
 771             if (pkt->isEviction()) {
 772                 // for block-evicting packets, i.e. writebacks and
 773                 // clean evictions, there is no need to snoop up, as
 774                 // all we do is determine if the block is cached or
 775                 // not, instead just set it here based on the snoop
 776                 // filter result
 777                 if (!sf_res.first.empty())
 778                     pkt->setBlockCached();
 779             } else {
 780                 snoop_result = forwardAtomic(pkt, cpu_side_port_id,
 781                                             InvalidPortID, sf_res.first);
 782             }
 783         } else {
 784             snoop_result = forwardAtomic(pkt, cpu_side_port_id);
 785         }
 786         snoop_response_cmd = snoop_result.first;
 787         snoop_response_latency += snoop_result.second;
 788     }
 789
 790     // set up a sensible default value
 791     Tick response_latency = 0;
 792
 793     const bool sink_packet = sinkPacket(pkt);
 794
 795     // even if we had a snoop response, we must continue and also
 796     // perform the actual request at the destination
 797     PortID mem_side_port_id = findPort(pkt->getAddrRange());
 798
 799     if (sink_packet) {
 800         DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
 801                 pkt->print());
 802     } else {
 803         if (forwardPacket(pkt)) {
 804             // make sure that the write request (e.g., WriteClean)
 805             // will stop at the memory below if this crossbar is its
 806             // destination
 807             if (pkt->isWrite() && is_destination) {
 808                 pkt->clearWriteThrough();
 809             }
 810
 811             // forward the request to the appropriate destination
 812             auto mem_side_port = memSidePorts[mem_side_port_id];
 813             response_latency = backdoor ?
 814                 mem_side_port->sendAtomicBackdoor(pkt, *backdoor) :
 815                 mem_side_port->sendAtomic(pkt);
 816         } else {
 817             // if it does not need a response we sink the packet above
 818             assert(pkt->needsResponse());
 819
 820             pkt->makeResponse();
 821         }
 822     }
 823
 824     // stats updates for the request
 825     pktCount[cpu_side_port_id][mem_side_port_id]++;
 826     pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
 827     transDist[pkt_cmd]++;
 828
 829
 830     // if lower levels have replied, tell the snoop filter
 831     if (!system->bypassCaches() && snoopFilter && pkt->isResponse()) {
 832         snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
 833     }
 834
 835     // if we got a response from a snooper, restore it here
 836     if (snoop_response_cmd != MemCmd::InvalidCmd) {
 837         // no one else should have responded
 838         assert(!pkt->isResponse());
 839         pkt->cmd = snoop_response_cmd;
 840         response_latency = snoop_response_latency;
 841     }
 842
 843     // If this is the destination of the cache clean operation the
 844     // crossbar is responsible for responding. This crossbar will
 845     // respond when the cache clean is complete. An atomic cache clean
 846     // is complete when the crossbars receives the cache clean
 847     // request (CleanSharedReq, CleanInvalidReq), as either:
 848     // * no cache above had a dirty copy of the block as indicated by
 849     //   the satisfied flag of the packet, or
 850     // * the crossbar has already seen the corresponding write
 851     //   (WriteClean) which updates the block in the memory below.
 852     if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) {
 853         auto it = outstandingCMO.find(pkt->id);
 854         assert(it != outstandingCMO.end());
 855         // we are responding right away
 856         outstandingCMO.erase(it);
 857     } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) {
 858         // if this is the destination of the operation, the xbar
 859         // sends the responce to the cache clean operation only
 860         // after having encountered the cache clean request
 861         M5_VAR_USED auto ret = outstandingCMO.emplace(pkt->id, nullptr);
 862         // in atomic mode we know that the WriteClean packet should
 863         // precede the clean request
 864         assert(ret.second);
 865     }
 866
 867     // add the response data
 868     if (pkt->isResponse()) {
 869         pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 870         pkt_cmd = pkt->cmdToIndex();
 871
 872         // stats updates
 873         pktCount[cpu_side_port_id][mem_side_port_id]++;
 874         pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
 875         transDist[pkt_cmd]++;
 876     }
 877
 878     // @todo: Not setting header time
 879     pkt->payloadDelay = response_latency;
 880     return response_latency;
 881 }
 882
 883 Tick
 884 CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID mem_side_port_id)
 885 {
 886     DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
 887             memSidePorts[mem_side_port_id]->name(), pkt->print());
 888
 889     // add the request snoop data
 890     unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
 891     snoops++;
 892     snoopTraffic += pkt_size;
 893
 894     // forward to all snoopers
 895     std::pair<MemCmd, Tick> snoop_result;
 896     Tick snoop_response_latency = 0;
 897     if (snoopFilter) {
 898         auto sf_res = snoopFilter->lookupSnoop(pkt);
 899         snoop_response_latency += sf_res.second * clockPeriod();
 900         DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
 901                 __func__, memSidePorts[mem_side_port_id]->name(),
 902                 pkt->print(), sf_res.first.size(), sf_res.second);
 903         snoop_result = forwardAtomic(pkt, InvalidPortID, mem_side_port_id,
 904                                      sf_res.first);
 905     } else {
 906         snoop_result = forwardAtomic(pkt, InvalidPortID);
 907     }
 908     MemCmd snoop_response_cmd = snoop_result.first;
 909     snoop_response_latency += snoop_result.second;
 910
 911     if (snoop_response_cmd != MemCmd::InvalidCmd)
 912         pkt->cmd = snoop_response_cmd;
 913
 914     // add the response snoop data
 915     if (pkt->isResponse()) {
 916         snoops++;
 917     }
 918
 919     // @todo: Not setting header time
 920     pkt->payloadDelay = snoop_response_latency;
 921     return snoop_response_latency;
 922 }
 923
 924 std::pair<MemCmd, Tick>
 925 CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_cpu_side_port_id,
 926                            PortID source_mem_side_port_id,
 927                            const std::vector<QueuedResponsePort*>& dests)
 928 {
 929     // the packet may be changed on snoops, record the original
 930     // command to enable us to restore it between snoops so that
 931     // additional snoops can take place properly
 932     MemCmd orig_cmd = pkt->cmd;
 933     MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
 934     Tick snoop_response_latency = 0;
 935
 936     // snoops should only happen if the system isn't bypassing caches
 937     assert(!system->bypassCaches());
 938
 939     unsigned fanout = 0;
 940
 941     for (const auto& p: dests) {
 942         // we could have gotten this request from a snooping memory-side port
 943         // (corresponding to our own CPU-side port that is also in
 944         // snoopPorts) and should not send it back to where it came
 945         // from
 946         if (exclude_cpu_side_port_id != InvalidPortID &&
 947             p->getId() == exclude_cpu_side_port_id)
 948             continue;
 949
 950         Tick latency = p->sendAtomicSnoop(pkt);
 951         fanout++;
 952
 953         // in contrast to a functional access, we have to keep on
 954         // going as all snoopers must be updated even if we get a
 955         // response
 956         if (!pkt->isResponse())
 957             continue;
 958
 959         // response from snoop agent
 960         assert(pkt->cmd != orig_cmd);
 961         assert(pkt->cacheResponding());
 962         // should only happen once
 963         assert(snoop_response_cmd == MemCmd::InvalidCmd);
 964         // save response state
 965         snoop_response_cmd = pkt->cmd;
 966         snoop_response_latency = latency;
 967
 968         if (snoopFilter) {
 969             // Handle responses by the snoopers and differentiate between
 970             // responses to requests from above and snoops from below
 971             if (source_mem_side_port_id != InvalidPortID) {
 972                 // Getting a response for a snoop from below
 973                 assert(exclude_cpu_side_port_id == InvalidPortID);
 974                 snoopFilter->updateSnoopForward(pkt, *p,
 975                              *memSidePorts[source_mem_side_port_id]);
 976             } else {
 977                 // Getting a response for a request from above
 978                 assert(source_mem_side_port_id == InvalidPortID);
 979                 snoopFilter->updateSnoopResponse(pkt, *p,
 980                              *cpuSidePorts[exclude_cpu_side_port_id]);
 981             }
 982         }
 983         // restore original packet state for remaining snoopers
 984         pkt->cmd = orig_cmd;
 985     }
 986
 987     // Stats for fanout
 988     snoopFanout.sample(fanout);
 989
 990     // the packet is restored as part of the loop and any potential
 991     // snoop response is part of the returned pair
 992     return std::make_pair(snoop_response_cmd, snoop_response_latency);
 993 }
 994
 995 void
 996 CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
 997 {
 998     if (!pkt->isPrint()) {
 999         // don't do DPRINTFs on PrintReq as it clutters up the output
1000         DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
1001                 cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
1002     }
1003
1004     if (!system->bypassCaches()) {
1005         // forward to all snoopers but the source
1006         forwardFunctional(pkt, cpu_side_port_id);
1007     }
1008
1009     // there is no need to continue if the snooping has found what we
1010     // were looking for and the packet is already a response
1011     if (!pkt->isResponse()) {
1012         // since our CPU-side ports are queued ports we need to check
1013         // them as well
1014         for (const auto& p : cpuSidePorts) {
1015             // if we find a response that has the data, then the
1016             // downstream caches/memories may be out of date, so simply stop
1017             // here
1018             if (p->trySatisfyFunctional(pkt)) {
1019                 if (pkt->needsResponse())
1020                     pkt->makeResponse();
1021                 return;
1022             }
1023         }
1024
1025         PortID dest_id = findPort(pkt->getAddrRange());
1026
1027         memSidePorts[dest_id]->sendFunctional(pkt);
1028     }
1029 }
1030
1031 void
1032 CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID mem_side_port_id)
1033 {
1034     if (!pkt->isPrint()) {
1035         // don't do DPRINTFs on PrintReq as it clutters up the output
1036         DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
1037                 memSidePorts[mem_side_port_id]->name(), pkt->print());
1038     }
1039
1040     for (const auto& p : cpuSidePorts) {
1041         if (p->trySatisfyFunctional(pkt)) {
1042             if (pkt->needsResponse())
1043                 pkt->makeResponse();
1044             return;
1045         }
1046     }
1047
1048     // forward to all snoopers
1049     forwardFunctional(pkt, InvalidPortID);
1050 }
1051
1052 void
1053 CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_cpu_side_port_id)
1054 {
1055     // snoops should only happen if the system isn't bypassing caches
1056     assert(!system->bypassCaches());
1057
1058     for (const auto& p: snoopPorts) {
1059         // we could have gotten this request from a snooping requestor
1060         // (corresponding to our own CPU-side port that is also in
1061         // snoopPorts) and should not send it back to where it came
1062         // from
1063         if (exclude_cpu_side_port_id == InvalidPortID ||
1064             p->getId() != exclude_cpu_side_port_id)
1065             p->sendFunctionalSnoop(pkt);
1066
1067         // if we get a response we are done
1068         if (pkt->isResponse()) {
1069             break;
1070         }
1071     }
1072 }
1073
1074 bool
1075 CoherentXBar::sinkPacket(const PacketPtr pkt) const
1076 {
1077     // we can sink the packet if:
1078     // 1) the crossbar is the point of coherency, and a cache is
1079     //    responding after being snooped
1080     // 2) the crossbar is the point of coherency, and the packet is a
1081     //    coherency packet (not a read or a write) that does not
1082     //    require a response
1083     // 3) this is a clean evict or clean writeback, but the packet is
1084     //    found in a cache above this crossbar
1085     // 4) a cache is responding after being snooped, and the packet
1086     //    either does not need the block to be writable, or the cache
1087     //    that has promised to respond (setting the cache responding
1088     //    flag) is providing writable and thus had a Modified block,
1089     //    and no further action is needed
1090     return (pointOfCoherency && pkt->cacheResponding()) ||
1091         (pointOfCoherency && !(pkt->isRead() || pkt->isWrite()) &&
1092          !pkt->needsResponse()) ||
1093         (pkt->isCleanEviction() && pkt->isBlockCached()) ||
1094         (pkt->cacheResponding() &&
1095          (!pkt->needsWritable() || pkt->responderHadWritable()));
1096 }
1097
1098 bool
1099 CoherentXBar::forwardPacket(const PacketPtr pkt)
1100 {
1101     // we are forwarding the packet if:
1102     // 1) this is a cache clean request to the PoU/PoC and this
1103     //    crossbar is above the PoU/PoC
1104     // 2) this is a read or a write
1105     // 3) this crossbar is above the point of coherency
1106     if (pkt->isClean()) {
1107         return !isDestination(pkt);
1108     }
1109     return pkt->isRead() || pkt->isWrite() || !pointOfCoherency;
1110 }
1111
1112
1113 void
1114 CoherentXBar::regStats()
1115 {
1116     BaseXBar::regStats();
1117
1118     snoopFanout.init(0, snoopPorts.size(), 1);
1119 }