mem: Change warmupCycle stat to warmupTick
[gem5.git] / src / mem / coherent_xbar.cc
1 /*
2 * Copyright (c) 2011-2020 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2006 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /**
42 * @file
43 * Definition of a crossbar object.
44 */
45
46 #include "mem/coherent_xbar.hh"
47
48 #include "base/logging.hh"
49 #include "base/trace.hh"
50 #include "debug/AddrRanges.hh"
51 #include "debug/CoherentXBar.hh"
52 #include "sim/system.hh"
53
54 CoherentXBar::CoherentXBar(const CoherentXBarParams &p)
55 : BaseXBar(p), system(p.system), snoopFilter(p.snoop_filter),
56 snoopResponseLatency(p.snoop_response_latency),
57 maxOutstandingSnoopCheck(p.max_outstanding_snoops),
58 maxRoutingTableSizeCheck(p.max_routing_table_size),
59 pointOfCoherency(p.point_of_coherency),
60 pointOfUnification(p.point_of_unification),
61
62 ADD_STAT(snoops, UNIT_COUNT, "Total snoops"),
63 ADD_STAT(snoopTraffic, UNIT_BYTE, "Total snoop traffic"),
64 ADD_STAT(snoopFanout, UNIT_COUNT, "Request fanout histogram")
65 {
66 // create the ports based on the size of the memory-side port and
67 // CPU-side port vector ports, and the presence of the default port,
68 // the ports are enumerated starting from zero
69 for (int i = 0; i < p.port_mem_side_ports_connection_count; ++i) {
70 std::string portName = csprintf("%s.mem_side_port[%d]", name(), i);
71 RequestPort* bp = new CoherentXBarRequestPort(portName, *this, i);
72 memSidePorts.push_back(bp);
73 reqLayers.push_back(new ReqLayer(*bp, *this,
74 csprintf("reqLayer%d", i)));
75 snoopLayers.push_back(
76 new SnoopRespLayer(*bp, *this, csprintf("snoopLayer%d", i)));
77 }
78
79 // see if we have a default CPU-side-port device connected and if so add
80 // our corresponding memory-side port
81 if (p.port_default_connection_count) {
82 defaultPortID = memSidePorts.size();
83 std::string portName = name() + ".default";
84 RequestPort* bp = new CoherentXBarRequestPort(portName, *this,
85 defaultPortID);
86 memSidePorts.push_back(bp);
87 reqLayers.push_back(new ReqLayer(*bp, *this, csprintf("reqLayer%d",
88 defaultPortID)));
89 snoopLayers.push_back(new SnoopRespLayer(*bp, *this,
90 csprintf("snoopLayer%d",
91 defaultPortID)));
92 }
93
94 // create the CPU-side ports, once again starting at zero
95 for (int i = 0; i < p.port_cpu_side_ports_connection_count; ++i) {
96 std::string portName = csprintf("%s.cpu_side_port[%d]", name(), i);
97 QueuedResponsePort* bp = new CoherentXBarResponsePort(portName,
98 *this, i);
99 cpuSidePorts.push_back(bp);
100 respLayers.push_back(new RespLayer(*bp, *this,
101 csprintf("respLayer%d", i)));
102 snoopRespPorts.push_back(new SnoopRespPort(*bp, *this));
103 }
104 }
105
106 CoherentXBar::~CoherentXBar()
107 {
108 for (auto l: reqLayers)
109 delete l;
110 for (auto l: respLayers)
111 delete l;
112 for (auto l: snoopLayers)
113 delete l;
114 for (auto p: snoopRespPorts)
115 delete p;
116 }
117
118 void
119 CoherentXBar::init()
120 {
121 BaseXBar::init();
122
123 // iterate over our CPU-side ports and determine which of our
124 // neighbouring memory-side ports are snooping and add them as snoopers
125 for (const auto& p: cpuSidePorts) {
126 // check if the connected memory-side port is snooping
127 if (p->isSnooping()) {
128 DPRINTF(AddrRanges, "Adding snooping requestor %s\n",
129 p->getPeer());
130 snoopPorts.push_back(p);
131 }
132 }
133
134 if (snoopPorts.empty())
135 warn("CoherentXBar %s has no snooping ports attached!\n", name());
136
137 // inform the snoop filter about the CPU-side ports so it can create
138 // its own internal representation
139 if (snoopFilter)
140 snoopFilter->setCPUSidePorts(cpuSidePorts);
141 }
142
143 bool
144 CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
145 {
146 // determine the source port based on the id
147 ResponsePort *src_port = cpuSidePorts[cpu_side_port_id];
148
149 // remember if the packet is an express snoop
150 bool is_express_snoop = pkt->isExpressSnoop();
151 bool cache_responding = pkt->cacheResponding();
152 // for normal requests, going downstream, the express snoop flag
153 // and the cache responding flag should always be the same
154 assert(is_express_snoop == cache_responding);
155
156 // determine the destination based on the destination address range
157 PortID mem_side_port_id = findPort(pkt->getAddrRange());
158
159 // test if the crossbar should be considered occupied for the current
160 // port, and exclude express snoops from the check
161 if (!is_express_snoop &&
162 !reqLayers[mem_side_port_id]->tryTiming(src_port)) {
163 DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
164 src_port->name(), pkt->print());
165 return false;
166 }
167
168 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
169 src_port->name(), pkt->print());
170
171 // store size and command as they might be modified when
172 // forwarding the packet
173 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
174 unsigned int pkt_cmd = pkt->cmdToIndex();
175
176 // store the old header delay so we can restore it if needed
177 Tick old_header_delay = pkt->headerDelay;
178
179 // a request sees the frontend and forward latency
180 Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
181
182 // set the packet header and payload delay
183 calcPacketTiming(pkt, xbar_delay);
184
185 // determine how long to be crossbar layer is busy
186 Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
187
188 // is this the destination point for this packet? (e.g. true if
189 // this xbar is the PoC for a cache maintenance operation to the
190 // PoC) otherwise the destination is any cache that can satisfy
191 // the request
192 const bool is_destination = isDestination(pkt);
193
194 const bool snoop_caches = !system->bypassCaches() &&
195 pkt->cmd != MemCmd::WriteClean;
196 if (snoop_caches) {
197 assert(pkt->snoopDelay == 0);
198
199 if (pkt->isClean() && !is_destination) {
200 // before snooping we need to make sure that the memory
201 // below is not busy and the cache clean request can be
202 // forwarded to it
203 if (!memSidePorts[mem_side_port_id]->tryTiming(pkt)) {
204 DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
205 src_port->name(), pkt->print());
206
207 // update the layer state and schedule an idle event
208 reqLayers[mem_side_port_id]->failedTiming(src_port,
209 clockEdge(Cycles(1)));
210 return false;
211 }
212 }
213
214
215 // the packet is a memory-mapped request and should be
216 // broadcasted to our snoopers but the source
217 if (snoopFilter) {
218 // check with the snoop filter where to forward this packet
219 auto sf_res = snoopFilter->lookupRequest(pkt, *src_port);
220 // the time required by a packet to be delivered through
221 // the xbar has to be charged also with to lookup latency
222 // of the snoop filter
223 pkt->headerDelay += sf_res.second * clockPeriod();
224 DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
225 __func__, src_port->name(), pkt->print(),
226 sf_res.first.size(), sf_res.second);
227
228 if (pkt->isEviction()) {
229 // for block-evicting packets, i.e. writebacks and
230 // clean evictions, there is no need to snoop up, as
231 // all we do is determine if the block is cached or
232 // not, instead just set it here based on the snoop
233 // filter result
234 if (!sf_res.first.empty())
235 pkt->setBlockCached();
236 } else {
237 forwardTiming(pkt, cpu_side_port_id, sf_res.first);
238 }
239 } else {
240 forwardTiming(pkt, cpu_side_port_id);
241 }
242
243 // add the snoop delay to our header delay, and then reset it
244 pkt->headerDelay += pkt->snoopDelay;
245 pkt->snoopDelay = 0;
246 }
247
248 // set up a sensible starting point
249 bool success = true;
250
251 // remember if the packet will generate a snoop response by
252 // checking if a cache set the cacheResponding flag during the
253 // snooping above
254 const bool expect_snoop_resp = !cache_responding && pkt->cacheResponding();
255 bool expect_response = pkt->needsResponse() && !pkt->cacheResponding();
256
257 const bool sink_packet = sinkPacket(pkt);
258
259 // in certain cases the crossbar is responsible for responding
260 bool respond_directly = false;
261 // store the original address as an address mapper could possibly
262 // modify the address upon a sendTimingRequest
263 const Addr addr(pkt->getAddr());
264 if (sink_packet) {
265 DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
266 pkt->print());
267 } else {
268 // determine if we are forwarding the packet, or responding to
269 // it
270 if (forwardPacket(pkt)) {
271 // if we are passing on, rather than sinking, a packet to
272 // which an upstream cache has committed to responding,
273 // the line was needs writable, and the responding only
274 // had an Owned copy, so we need to immidiately let the
275 // downstream caches know, bypass any flow control
276 if (pkt->cacheResponding()) {
277 pkt->setExpressSnoop();
278 }
279
280 // make sure that the write request (e.g., WriteClean)
281 // will stop at the memory below if this crossbar is its
282 // destination
283 if (pkt->isWrite() && is_destination) {
284 pkt->clearWriteThrough();
285 }
286
287 // since it is a normal request, attempt to send the packet
288 success = memSidePorts[mem_side_port_id]->sendTimingReq(pkt);
289 } else {
290 // no need to forward, turn this packet around and respond
291 // directly
292 assert(pkt->needsResponse());
293
294 respond_directly = true;
295 assert(!expect_snoop_resp);
296 expect_response = false;
297 }
298 }
299
300 if (snoopFilter && snoop_caches) {
301 // Let the snoop filter know about the success of the send operation
302 snoopFilter->finishRequest(!success, addr, pkt->isSecure());
303 }
304
305 // check if we were successful in sending the packet onwards
306 if (!success) {
307 // express snoops should never be forced to retry
308 assert(!is_express_snoop);
309
310 // restore the header delay
311 pkt->headerDelay = old_header_delay;
312
313 DPRINTF(CoherentXBar, "%s: src %s packet %s RETRY\n", __func__,
314 src_port->name(), pkt->print());
315
316 // update the layer state and schedule an idle event
317 reqLayers[mem_side_port_id]->failedTiming(src_port,
318 clockEdge(Cycles(1)));
319 } else {
320 // express snoops currently bypass the crossbar state entirely
321 if (!is_express_snoop) {
322 // if this particular request will generate a snoop
323 // response
324 if (expect_snoop_resp) {
325 // we should never have an exsiting request outstanding
326 assert(outstandingSnoop.find(pkt->req) ==
327 outstandingSnoop.end());
328 outstandingSnoop.insert(pkt->req);
329
330 // basic sanity check on the outstanding snoops
331 panic_if(outstandingSnoop.size() > maxOutstandingSnoopCheck,
332 "%s: Outstanding snoop requests exceeded %d\n",
333 name(), maxOutstandingSnoopCheck);
334 }
335
336 // remember where to route the normal response to
337 if (expect_response || expect_snoop_resp) {
338 assert(routeTo.find(pkt->req) == routeTo.end());
339 routeTo[pkt->req] = cpu_side_port_id;
340
341 panic_if(routeTo.size() > maxRoutingTableSizeCheck,
342 "%s: Routing table exceeds %d packets\n",
343 name(), maxRoutingTableSizeCheck);
344 }
345
346 // update the layer state and schedule an idle event
347 reqLayers[mem_side_port_id]->succeededTiming(packetFinishTime);
348 }
349
350 // stats updates only consider packets that were successfully sent
351 pktCount[cpu_side_port_id][mem_side_port_id]++;
352 pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
353 transDist[pkt_cmd]++;
354
355 if (is_express_snoop) {
356 snoops++;
357 snoopTraffic += pkt_size;
358 }
359 }
360
361 if (sink_packet)
362 // queue the packet for deletion
363 pendingDelete.reset(pkt);
364
365 // normally we respond to the packet we just received if we need to
366 PacketPtr rsp_pkt = pkt;
367 PortID rsp_port_id = cpu_side_port_id;
368
369 // If this is the destination of the cache clean operation the
370 // crossbar is responsible for responding. This crossbar will
371 // respond when the cache clean is complete. A cache clean
372 // is complete either:
373 // * direcly, if no cache above had a dirty copy of the block
374 // as indicated by the satisfied flag of the packet, or
375 // * when the crossbar has seen both the cache clean request
376 // (CleanSharedReq, CleanInvalidReq) and the corresponding
377 // write (WriteClean) which updates the block in the memory
378 // below.
379 if (success &&
380 ((pkt->isClean() && pkt->satisfied()) ||
381 pkt->cmd == MemCmd::WriteClean) &&
382 is_destination) {
383 PacketPtr deferred_rsp = pkt->isWrite() ? nullptr : pkt;
384 auto cmo_lookup = outstandingCMO.find(pkt->id);
385 if (cmo_lookup != outstandingCMO.end()) {
386 // the cache clean request has already reached this xbar
387 respond_directly = true;
388 if (pkt->isWrite()) {
389 rsp_pkt = cmo_lookup->second;
390 assert(rsp_pkt);
391
392 // determine the destination
393 const auto route_lookup = routeTo.find(rsp_pkt->req);
394 assert(route_lookup != routeTo.end());
395 rsp_port_id = route_lookup->second;
396 assert(rsp_port_id != InvalidPortID);
397 assert(rsp_port_id < respLayers.size());
398 // remove the request from the routing table
399 routeTo.erase(route_lookup);
400 }
401 outstandingCMO.erase(cmo_lookup);
402 } else {
403 respond_directly = false;
404 outstandingCMO.emplace(pkt->id, deferred_rsp);
405 if (!pkt->isWrite()) {
406 assert(routeTo.find(pkt->req) == routeTo.end());
407 routeTo[pkt->req] = cpu_side_port_id;
408
409 panic_if(routeTo.size() > maxRoutingTableSizeCheck,
410 "%s: Routing table exceeds %d packets\n",
411 name(), maxRoutingTableSizeCheck);
412 }
413 }
414 }
415
416
417 if (respond_directly) {
418 assert(rsp_pkt->needsResponse());
419 assert(success);
420
421 rsp_pkt->makeResponse();
422
423 if (snoopFilter && !system->bypassCaches()) {
424 // let the snoop filter inspect the response and update its state
425 snoopFilter->updateResponse(rsp_pkt, *cpuSidePorts[rsp_port_id]);
426 }
427
428 // we send the response after the current packet, even if the
429 // response is not for this packet (e.g. cache clean operation
430 // where both the request and the write packet have to cross
431 // the destination xbar before the response is sent.)
432 Tick response_time = clockEdge() + pkt->headerDelay;
433 rsp_pkt->headerDelay = 0;
434
435 cpuSidePorts[rsp_port_id]->schedTimingResp(rsp_pkt, response_time);
436 }
437
438 return success;
439 }
440
441 bool
442 CoherentXBar::recvTimingResp(PacketPtr pkt, PortID mem_side_port_id)
443 {
444 // determine the source port based on the id
445 RequestPort *src_port = memSidePorts[mem_side_port_id];
446
447 // determine the destination
448 const auto route_lookup = routeTo.find(pkt->req);
449 assert(route_lookup != routeTo.end());
450 const PortID cpu_side_port_id = route_lookup->second;
451 assert(cpu_side_port_id != InvalidPortID);
452 assert(cpu_side_port_id < respLayers.size());
453
454 // test if the crossbar should be considered occupied for the
455 // current port
456 if (!respLayers[cpu_side_port_id]->tryTiming(src_port)) {
457 DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
458 src_port->name(), pkt->print());
459 return false;
460 }
461
462 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
463 src_port->name(), pkt->print());
464
465 // store size and command as they might be modified when
466 // forwarding the packet
467 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
468 unsigned int pkt_cmd = pkt->cmdToIndex();
469
470 // a response sees the response latency
471 Tick xbar_delay = responseLatency * clockPeriod();
472
473 // set the packet header and payload delay
474 calcPacketTiming(pkt, xbar_delay);
475
476 // determine how long to be crossbar layer is busy
477 Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
478
479 if (snoopFilter && !system->bypassCaches()) {
480 // let the snoop filter inspect the response and update its state
481 snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
482 }
483
484 // send the packet through the destination CPU-side port and pay for
485 // any outstanding header delay
486 Tick latency = pkt->headerDelay;
487 pkt->headerDelay = 0;
488 cpuSidePorts[cpu_side_port_id]->schedTimingResp(pkt, curTick()
489 + latency);
490
491 // remove the request from the routing table
492 routeTo.erase(route_lookup);
493
494 respLayers[cpu_side_port_id]->succeededTiming(packetFinishTime);
495
496 // stats updates
497 pktCount[cpu_side_port_id][mem_side_port_id]++;
498 pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
499 transDist[pkt_cmd]++;
500
501 return true;
502 }
503
504 void
505 CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID mem_side_port_id)
506 {
507 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
508 memSidePorts[mem_side_port_id]->name(), pkt->print());
509
510 // update stats here as we know the forwarding will succeed
511 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
512 transDist[pkt->cmdToIndex()]++;
513 snoops++;
514 snoopTraffic += pkt_size;
515
516 // we should only see express snoops from caches
517 assert(pkt->isExpressSnoop());
518
519 // set the packet header and payload delay, for now use forward latency
520 // @todo Assess the choice of latency further
521 calcPacketTiming(pkt, forwardLatency * clockPeriod());
522
523 // remember if a cache has already committed to responding so we
524 // can see if it changes during the snooping
525 const bool cache_responding = pkt->cacheResponding();
526
527 assert(pkt->snoopDelay == 0);
528
529 if (snoopFilter) {
530 // let the Snoop Filter work its magic and guide probing
531 auto sf_res = snoopFilter->lookupSnoop(pkt);
532 // the time required by a packet to be delivered through
533 // the xbar has to be charged also with to lookup latency
534 // of the snoop filter
535 pkt->headerDelay += sf_res.second * clockPeriod();
536 DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
537 __func__, memSidePorts[mem_side_port_id]->name(),
538 pkt->print(), sf_res.first.size(), sf_res.second);
539
540 // forward to all snoopers
541 forwardTiming(pkt, InvalidPortID, sf_res.first);
542 } else {
543 forwardTiming(pkt, InvalidPortID);
544 }
545
546 // add the snoop delay to our header delay, and then reset it
547 pkt->headerDelay += pkt->snoopDelay;
548 pkt->snoopDelay = 0;
549
550 // if we can expect a response, remember how to route it
551 if (!cache_responding && pkt->cacheResponding()) {
552 assert(routeTo.find(pkt->req) == routeTo.end());
553 routeTo[pkt->req] = mem_side_port_id;
554 }
555
556 // a snoop request came from a connected CPU-side-port device (one of
557 // our memory-side ports), and if it is not coming from the CPU-side-port
558 // device responsible for the address range something is
559 // wrong, hence there is nothing further to do as the packet
560 // would be going back to where it came from
561 assert(findPort(pkt->getAddrRange()) == mem_side_port_id);
562 }
563
564 bool
565 CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID cpu_side_port_id)
566 {
567 // determine the source port based on the id
568 ResponsePort* src_port = cpuSidePorts[cpu_side_port_id];
569
570 // get the destination
571 const auto route_lookup = routeTo.find(pkt->req);
572 assert(route_lookup != routeTo.end());
573 const PortID dest_port_id = route_lookup->second;
574 assert(dest_port_id != InvalidPortID);
575
576 // determine if the response is from a snoop request we
577 // created as the result of a normal request (in which case it
578 // should be in the outstandingSnoop), or if we merely forwarded
579 // someone else's snoop request
580 const bool forwardAsSnoop = outstandingSnoop.find(pkt->req) ==
581 outstandingSnoop.end();
582
583 // test if the crossbar should be considered occupied for the
584 // current port, note that the check is bypassed if the response
585 // is being passed on as a normal response since this is occupying
586 // the response layer rather than the snoop response layer
587 if (forwardAsSnoop) {
588 assert(dest_port_id < snoopLayers.size());
589 if (!snoopLayers[dest_port_id]->tryTiming(src_port)) {
590 DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
591 src_port->name(), pkt->print());
592 return false;
593 }
594 } else {
595 // get the memory-side port that mirrors this CPU-side port internally
596 RequestPort* snoop_port = snoopRespPorts[cpu_side_port_id];
597 assert(dest_port_id < respLayers.size());
598 if (!respLayers[dest_port_id]->tryTiming(snoop_port)) {
599 DPRINTF(CoherentXBar, "%s: src %s packet %s BUSY\n", __func__,
600 snoop_port->name(), pkt->print());
601 return false;
602 }
603 }
604
605 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
606 src_port->name(), pkt->print());
607
608 // store size and command as they might be modified when
609 // forwarding the packet
610 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
611 unsigned int pkt_cmd = pkt->cmdToIndex();
612
613 // responses are never express snoops
614 assert(!pkt->isExpressSnoop());
615
616 // a snoop response sees the snoop response latency, and if it is
617 // forwarded as a normal response, the response latency
618 Tick xbar_delay =
619 (forwardAsSnoop ? snoopResponseLatency : responseLatency) *
620 clockPeriod();
621
622 // set the packet header and payload delay
623 calcPacketTiming(pkt, xbar_delay);
624
625 // determine how long to be crossbar layer is busy
626 Tick packetFinishTime = clockEdge(headerLatency) + pkt->payloadDelay;
627
628 // forward it either as a snoop response or a normal response
629 if (forwardAsSnoop) {
630 // this is a snoop response to a snoop request we forwarded,
631 // e.g. coming from the L1 and going to the L2, and it should
632 // be forwarded as a snoop response
633
634 if (snoopFilter) {
635 // update the probe filter so that it can properly track the line
636 snoopFilter->updateSnoopForward(pkt,
637 *cpuSidePorts[cpu_side_port_id],
638 *memSidePorts[dest_port_id]);
639 }
640
641 M5_VAR_USED bool success =
642 memSidePorts[dest_port_id]->sendTimingSnoopResp(pkt);
643 pktCount[cpu_side_port_id][dest_port_id]++;
644 pktSize[cpu_side_port_id][dest_port_id] += pkt_size;
645 assert(success);
646
647 snoopLayers[dest_port_id]->succeededTiming(packetFinishTime);
648 } else {
649 // we got a snoop response on one of our CPU-side ports,
650 // i.e. from a coherent requestor connected to the crossbar, and
651 // since we created the snoop request as part of recvTiming,
652 // this should now be a normal response again
653 outstandingSnoop.erase(pkt->req);
654
655 // this is a snoop response from a coherent requestor, hence it
656 // should never go back to where the snoop response came from,
657 // but instead to where the original request came from
658 assert(cpu_side_port_id != dest_port_id);
659
660 if (snoopFilter) {
661 // update the probe filter so that it can properly track
662 // the line
663 snoopFilter->updateSnoopResponse(pkt,
664 *cpuSidePorts[cpu_side_port_id],
665 *cpuSidePorts[dest_port_id]);
666 }
667
668 DPRINTF(CoherentXBar, "%s: src %s packet %s FWD RESP\n", __func__,
669 src_port->name(), pkt->print());
670
671 // as a normal response, it should go back to a requestor through
672 // one of our CPU-side ports, we also pay for any outstanding
673 // header latency
674 Tick latency = pkt->headerDelay;
675 pkt->headerDelay = 0;
676 cpuSidePorts[dest_port_id]->schedTimingResp(pkt,
677 curTick() + latency);
678
679 respLayers[dest_port_id]->succeededTiming(packetFinishTime);
680 }
681
682 // remove the request from the routing table
683 routeTo.erase(route_lookup);
684
685 // stats updates
686 transDist[pkt_cmd]++;
687 snoops++;
688 snoopTraffic += pkt_size;
689
690 return true;
691 }
692
693
694 void
695 CoherentXBar::forwardTiming(PacketPtr pkt, PortID exclude_cpu_side_port_id,
696 const std::vector<QueuedResponsePort*>& dests)
697 {
698 DPRINTF(CoherentXBar, "%s for %s\n", __func__, pkt->print());
699
700 // snoops should only happen if the system isn't bypassing caches
701 assert(!system->bypassCaches());
702
703 unsigned fanout = 0;
704
705 for (const auto& p: dests) {
706 // we could have gotten this request from a snooping requestor
707 // (corresponding to our own CPU-side port that is also in
708 // snoopPorts) and should not send it back to where it came
709 // from
710 if (exclude_cpu_side_port_id == InvalidPortID ||
711 p->getId() != exclude_cpu_side_port_id) {
712 // cache is not allowed to refuse snoop
713 p->sendTimingSnoopReq(pkt);
714 fanout++;
715 }
716 }
717
718 // Stats for fanout of this forward operation
719 snoopFanout.sample(fanout);
720 }
721
722 void
723 CoherentXBar::recvReqRetry(PortID mem_side_port_id)
724 {
725 // responses and snoop responses never block on forwarding them,
726 // so the retry will always be coming from a port to which we
727 // tried to forward a request
728 reqLayers[mem_side_port_id]->recvRetry();
729 }
730
731 Tick
732 CoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
733 MemBackdoorPtr *backdoor)
734 {
735 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
736 cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
737
738 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
739 unsigned int pkt_cmd = pkt->cmdToIndex();
740
741 MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
742 Tick snoop_response_latency = 0;
743
744 // is this the destination point for this packet? (e.g. true if
745 // this xbar is the PoC for a cache maintenance operation to the
746 // PoC) otherwise the destination is any cache that can satisfy
747 // the request
748 const bool is_destination = isDestination(pkt);
749
750 const bool snoop_caches = !system->bypassCaches() &&
751 pkt->cmd != MemCmd::WriteClean;
752 if (snoop_caches) {
753 // forward to all snoopers but the source
754 std::pair<MemCmd, Tick> snoop_result;
755 if (snoopFilter) {
756 // check with the snoop filter where to forward this packet
757 auto sf_res =
758 snoopFilter->lookupRequest(pkt,
759 *cpuSidePorts [cpu_side_port_id]);
760 snoop_response_latency += sf_res.second * clockPeriod();
761 DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
762 __func__, cpuSidePorts[cpu_side_port_id]->name(),
763 pkt->print(), sf_res.first.size(), sf_res.second);
764
765 // let the snoop filter know about the success of the send
766 // operation, and do it even before sending it onwards to
767 // avoid situations where atomic upward snoops sneak in
768 // between and change the filter state
769 snoopFilter->finishRequest(false, pkt->getAddr(), pkt->isSecure());
770
771 if (pkt->isEviction()) {
772 // for block-evicting packets, i.e. writebacks and
773 // clean evictions, there is no need to snoop up, as
774 // all we do is determine if the block is cached or
775 // not, instead just set it here based on the snoop
776 // filter result
777 if (!sf_res.first.empty())
778 pkt->setBlockCached();
779 } else {
780 snoop_result = forwardAtomic(pkt, cpu_side_port_id,
781 InvalidPortID, sf_res.first);
782 }
783 } else {
784 snoop_result = forwardAtomic(pkt, cpu_side_port_id);
785 }
786 snoop_response_cmd = snoop_result.first;
787 snoop_response_latency += snoop_result.second;
788 }
789
790 // set up a sensible default value
791 Tick response_latency = 0;
792
793 const bool sink_packet = sinkPacket(pkt);
794
795 // even if we had a snoop response, we must continue and also
796 // perform the actual request at the destination
797 PortID mem_side_port_id = findPort(pkt->getAddrRange());
798
799 if (sink_packet) {
800 DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
801 pkt->print());
802 } else {
803 if (forwardPacket(pkt)) {
804 // make sure that the write request (e.g., WriteClean)
805 // will stop at the memory below if this crossbar is its
806 // destination
807 if (pkt->isWrite() && is_destination) {
808 pkt->clearWriteThrough();
809 }
810
811 // forward the request to the appropriate destination
812 auto mem_side_port = memSidePorts[mem_side_port_id];
813 response_latency = backdoor ?
814 mem_side_port->sendAtomicBackdoor(pkt, *backdoor) :
815 mem_side_port->sendAtomic(pkt);
816 } else {
817 // if it does not need a response we sink the packet above
818 assert(pkt->needsResponse());
819
820 pkt->makeResponse();
821 }
822 }
823
824 // stats updates for the request
825 pktCount[cpu_side_port_id][mem_side_port_id]++;
826 pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
827 transDist[pkt_cmd]++;
828
829
830 // if lower levels have replied, tell the snoop filter
831 if (!system->bypassCaches() && snoopFilter && pkt->isResponse()) {
832 snoopFilter->updateResponse(pkt, *cpuSidePorts[cpu_side_port_id]);
833 }
834
835 // if we got a response from a snooper, restore it here
836 if (snoop_response_cmd != MemCmd::InvalidCmd) {
837 // no one else should have responded
838 assert(!pkt->isResponse());
839 pkt->cmd = snoop_response_cmd;
840 response_latency = snoop_response_latency;
841 }
842
843 // If this is the destination of the cache clean operation the
844 // crossbar is responsible for responding. This crossbar will
845 // respond when the cache clean is complete. An atomic cache clean
846 // is complete when the crossbars receives the cache clean
847 // request (CleanSharedReq, CleanInvalidReq), as either:
848 // * no cache above had a dirty copy of the block as indicated by
849 // the satisfied flag of the packet, or
850 // * the crossbar has already seen the corresponding write
851 // (WriteClean) which updates the block in the memory below.
852 if (pkt->isClean() && isDestination(pkt) && pkt->satisfied()) {
853 auto it = outstandingCMO.find(pkt->id);
854 assert(it != outstandingCMO.end());
855 // we are responding right away
856 outstandingCMO.erase(it);
857 } else if (pkt->cmd == MemCmd::WriteClean && isDestination(pkt)) {
858 // if this is the destination of the operation, the xbar
859 // sends the responce to the cache clean operation only
860 // after having encountered the cache clean request
861 M5_VAR_USED auto ret = outstandingCMO.emplace(pkt->id, nullptr);
862 // in atomic mode we know that the WriteClean packet should
863 // precede the clean request
864 assert(ret.second);
865 }
866
867 // add the response data
868 if (pkt->isResponse()) {
869 pkt_size = pkt->hasData() ? pkt->getSize() : 0;
870 pkt_cmd = pkt->cmdToIndex();
871
872 // stats updates
873 pktCount[cpu_side_port_id][mem_side_port_id]++;
874 pktSize[cpu_side_port_id][mem_side_port_id] += pkt_size;
875 transDist[pkt_cmd]++;
876 }
877
878 // @todo: Not setting header time
879 pkt->payloadDelay = response_latency;
880 return response_latency;
881 }
882
883 Tick
884 CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID mem_side_port_id)
885 {
886 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
887 memSidePorts[mem_side_port_id]->name(), pkt->print());
888
889 // add the request snoop data
890 unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
891 snoops++;
892 snoopTraffic += pkt_size;
893
894 // forward to all snoopers
895 std::pair<MemCmd, Tick> snoop_result;
896 Tick snoop_response_latency = 0;
897 if (snoopFilter) {
898 auto sf_res = snoopFilter->lookupSnoop(pkt);
899 snoop_response_latency += sf_res.second * clockPeriod();
900 DPRINTF(CoherentXBar, "%s: src %s packet %s SF size: %i lat: %i\n",
901 __func__, memSidePorts[mem_side_port_id]->name(),
902 pkt->print(), sf_res.first.size(), sf_res.second);
903 snoop_result = forwardAtomic(pkt, InvalidPortID, mem_side_port_id,
904 sf_res.first);
905 } else {
906 snoop_result = forwardAtomic(pkt, InvalidPortID);
907 }
908 MemCmd snoop_response_cmd = snoop_result.first;
909 snoop_response_latency += snoop_result.second;
910
911 if (snoop_response_cmd != MemCmd::InvalidCmd)
912 pkt->cmd = snoop_response_cmd;
913
914 // add the response snoop data
915 if (pkt->isResponse()) {
916 snoops++;
917 }
918
919 // @todo: Not setting header time
920 pkt->payloadDelay = snoop_response_latency;
921 return snoop_response_latency;
922 }
923
924 std::pair<MemCmd, Tick>
925 CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_cpu_side_port_id,
926 PortID source_mem_side_port_id,
927 const std::vector<QueuedResponsePort*>& dests)
928 {
929 // the packet may be changed on snoops, record the original
930 // command to enable us to restore it between snoops so that
931 // additional snoops can take place properly
932 MemCmd orig_cmd = pkt->cmd;
933 MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
934 Tick snoop_response_latency = 0;
935
936 // snoops should only happen if the system isn't bypassing caches
937 assert(!system->bypassCaches());
938
939 unsigned fanout = 0;
940
941 for (const auto& p: dests) {
942 // we could have gotten this request from a snooping memory-side port
943 // (corresponding to our own CPU-side port that is also in
944 // snoopPorts) and should not send it back to where it came
945 // from
946 if (exclude_cpu_side_port_id != InvalidPortID &&
947 p->getId() == exclude_cpu_side_port_id)
948 continue;
949
950 Tick latency = p->sendAtomicSnoop(pkt);
951 fanout++;
952
953 // in contrast to a functional access, we have to keep on
954 // going as all snoopers must be updated even if we get a
955 // response
956 if (!pkt->isResponse())
957 continue;
958
959 // response from snoop agent
960 assert(pkt->cmd != orig_cmd);
961 assert(pkt->cacheResponding());
962 // should only happen once
963 assert(snoop_response_cmd == MemCmd::InvalidCmd);
964 // save response state
965 snoop_response_cmd = pkt->cmd;
966 snoop_response_latency = latency;
967
968 if (snoopFilter) {
969 // Handle responses by the snoopers and differentiate between
970 // responses to requests from above and snoops from below
971 if (source_mem_side_port_id != InvalidPortID) {
972 // Getting a response for a snoop from below
973 assert(exclude_cpu_side_port_id == InvalidPortID);
974 snoopFilter->updateSnoopForward(pkt, *p,
975 *memSidePorts[source_mem_side_port_id]);
976 } else {
977 // Getting a response for a request from above
978 assert(source_mem_side_port_id == InvalidPortID);
979 snoopFilter->updateSnoopResponse(pkt, *p,
980 *cpuSidePorts[exclude_cpu_side_port_id]);
981 }
982 }
983 // restore original packet state for remaining snoopers
984 pkt->cmd = orig_cmd;
985 }
986
987 // Stats for fanout
988 snoopFanout.sample(fanout);
989
990 // the packet is restored as part of the loop and any potential
991 // snoop response is part of the returned pair
992 return std::make_pair(snoop_response_cmd, snoop_response_latency);
993 }
994
995 void
996 CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
997 {
998 if (!pkt->isPrint()) {
999 // don't do DPRINTFs on PrintReq as it clutters up the output
1000 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
1001 cpuSidePorts[cpu_side_port_id]->name(), pkt->print());
1002 }
1003
1004 if (!system->bypassCaches()) {
1005 // forward to all snoopers but the source
1006 forwardFunctional(pkt, cpu_side_port_id);
1007 }
1008
1009 // there is no need to continue if the snooping has found what we
1010 // were looking for and the packet is already a response
1011 if (!pkt->isResponse()) {
1012 // since our CPU-side ports are queued ports we need to check
1013 // them as well
1014 for (const auto& p : cpuSidePorts) {
1015 // if we find a response that has the data, then the
1016 // downstream caches/memories may be out of date, so simply stop
1017 // here
1018 if (p->trySatisfyFunctional(pkt)) {
1019 if (pkt->needsResponse())
1020 pkt->makeResponse();
1021 return;
1022 }
1023 }
1024
1025 PortID dest_id = findPort(pkt->getAddrRange());
1026
1027 memSidePorts[dest_id]->sendFunctional(pkt);
1028 }
1029 }
1030
1031 void
1032 CoherentXBar::recvFunctionalSnoop(PacketPtr pkt, PortID mem_side_port_id)
1033 {
1034 if (!pkt->isPrint()) {
1035 // don't do DPRINTFs on PrintReq as it clutters up the output
1036 DPRINTF(CoherentXBar, "%s: src %s packet %s\n", __func__,
1037 memSidePorts[mem_side_port_id]->name(), pkt->print());
1038 }
1039
1040 for (const auto& p : cpuSidePorts) {
1041 if (p->trySatisfyFunctional(pkt)) {
1042 if (pkt->needsResponse())
1043 pkt->makeResponse();
1044 return;
1045 }
1046 }
1047
1048 // forward to all snoopers
1049 forwardFunctional(pkt, InvalidPortID);
1050 }
1051
1052 void
1053 CoherentXBar::forwardFunctional(PacketPtr pkt, PortID exclude_cpu_side_port_id)
1054 {
1055 // snoops should only happen if the system isn't bypassing caches
1056 assert(!system->bypassCaches());
1057
1058 for (const auto& p: snoopPorts) {
1059 // we could have gotten this request from a snooping requestor
1060 // (corresponding to our own CPU-side port that is also in
1061 // snoopPorts) and should not send it back to where it came
1062 // from
1063 if (exclude_cpu_side_port_id == InvalidPortID ||
1064 p->getId() != exclude_cpu_side_port_id)
1065 p->sendFunctionalSnoop(pkt);
1066
1067 // if we get a response we are done
1068 if (pkt->isResponse()) {
1069 break;
1070 }
1071 }
1072 }
1073
1074 bool
1075 CoherentXBar::sinkPacket(const PacketPtr pkt) const
1076 {
1077 // we can sink the packet if:
1078 // 1) the crossbar is the point of coherency, and a cache is
1079 // responding after being snooped
1080 // 2) the crossbar is the point of coherency, and the packet is a
1081 // coherency packet (not a read or a write) that does not
1082 // require a response
1083 // 3) this is a clean evict or clean writeback, but the packet is
1084 // found in a cache above this crossbar
1085 // 4) a cache is responding after being snooped, and the packet
1086 // either does not need the block to be writable, or the cache
1087 // that has promised to respond (setting the cache responding
1088 // flag) is providing writable and thus had a Modified block,
1089 // and no further action is needed
1090 return (pointOfCoherency && pkt->cacheResponding()) ||
1091 (pointOfCoherency && !(pkt->isRead() || pkt->isWrite()) &&
1092 !pkt->needsResponse()) ||
1093 (pkt->isCleanEviction() && pkt->isBlockCached()) ||
1094 (pkt->cacheResponding() &&
1095 (!pkt->needsWritable() || pkt->responderHadWritable()));
1096 }
1097
1098 bool
1099 CoherentXBar::forwardPacket(const PacketPtr pkt)
1100 {
1101 // we are forwarding the packet if:
1102 // 1) this is a cache clean request to the PoU/PoC and this
1103 // crossbar is above the PoU/PoC
1104 // 2) this is a read or a write
1105 // 3) this crossbar is above the point of coherency
1106 if (pkt->isClean()) {
1107 return !isDestination(pkt);
1108 }
1109 return pkt->isRead() || pkt->isWrite() || !pointOfCoherency;
1110 }
1111
1112
1113 void
1114 CoherentXBar::regStats()
1115 {
1116 BaseXBar::regStats();
1117
1118 snoopFanout.init(0, snoopPorts.size(), 1);
1119 }