b76b98fbd10a7a50cc4a9da2b04d9e76cf0e50e8
[gem5.git] / src / mem / cache / base.cc
1 /*
2 * Copyright (c) 2012-2013, 2018-2019 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2003-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /**
42 * @file
43 * Definition of BaseCache functions.
44 */
45
46 #include "mem/cache/base.hh"
47
48 #include "base/compiler.hh"
49 #include "base/logging.hh"
50 #include "debug/Cache.hh"
51 #include "debug/CacheComp.hh"
52 #include "debug/CachePort.hh"
53 #include "debug/CacheRepl.hh"
54 #include "debug/CacheVerbose.hh"
55 #include "mem/cache/compressors/base.hh"
56 #include "mem/cache/mshr.hh"
57 #include "mem/cache/prefetch/base.hh"
58 #include "mem/cache/queue_entry.hh"
59 #include "mem/cache/tags/compressed_tags.hh"
60 #include "mem/cache/tags/super_blk.hh"
61 #include "params/BaseCache.hh"
62 #include "params/WriteAllocator.hh"
63 #include "sim/core.hh"
64
65 BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name,
66 BaseCache *_cache,
67 const std::string &_label)
68 : QueuedResponsePort(_name, _cache, queue),
69 queue(*_cache, *this, true, _label),
70 blocked(false), mustSendRetry(false),
71 sendRetryEvent([this]{ processSendRetry(); }, _name)
72 {
73 }
74
75 BaseCache::BaseCache(const BaseCacheParams &p, unsigned blk_size)
76 : ClockedObject(p),
77 cpuSidePort (p.name + ".cpu_side_port", this, "CpuSidePort"),
78 memSidePort(p.name + ".mem_side_port", this, "MemSidePort"),
79 mshrQueue("MSHRs", p.mshrs, 0, p.demand_mshr_reserve), // see below
80 writeBuffer("write buffer", p.write_buffers, p.mshrs), // see below
81 tags(p.tags),
82 compressor(p.compressor),
83 prefetcher(p.prefetcher),
84 writeAllocator(p.write_allocator),
85 writebackClean(p.writeback_clean),
86 tempBlockWriteback(nullptr),
87 writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); },
88 name(), false,
89 EventBase::Delayed_Writeback_Pri),
90 blkSize(blk_size),
91 lookupLatency(p.tag_latency),
92 dataLatency(p.data_latency),
93 forwardLatency(p.tag_latency),
94 fillLatency(p.data_latency),
95 responseLatency(p.response_latency),
96 sequentialAccess(p.sequential_access),
97 numTarget(p.tgts_per_mshr),
98 forwardSnoops(true),
99 clusivity(p.clusivity),
100 isReadOnly(p.is_read_only),
101 replaceExpansions(p.replace_expansions),
102 moveContractions(p.move_contractions),
103 blocked(0),
104 order(0),
105 noTargetMSHR(nullptr),
106 missCount(p.max_miss_count),
107 addrRanges(p.addr_ranges.begin(), p.addr_ranges.end()),
108 system(p.system),
109 stats(*this)
110 {
111 // the MSHR queue has no reserve entries as we check the MSHR
112 // queue on every single allocation, whereas the write queue has
113 // as many reserve entries as we have MSHRs, since every MSHR may
114 // eventually require a writeback, and we do not check the write
115 // buffer before committing to an MSHR
116
117 // forward snoops is overridden in init() once we can query
118 // whether the connected requestor is actually snooping or not
119
120 tempBlock = new TempCacheBlk(blkSize);
121
122 tags->tagsInit();
123 if (prefetcher)
124 prefetcher->setCache(this);
125
126 fatal_if(compressor && !dynamic_cast<CompressedTags*>(tags),
127 "The tags of compressed cache %s must derive from CompressedTags",
128 name());
129 warn_if(!compressor && dynamic_cast<CompressedTags*>(tags),
130 "Compressed cache %s does not have a compression algorithm", name());
131 if (compressor)
132 compressor->setCache(this);
133 }
134
135 BaseCache::~BaseCache()
136 {
137 delete tempBlock;
138 }
139
140 void
141 BaseCache::CacheResponsePort::setBlocked()
142 {
143 assert(!blocked);
144 DPRINTF(CachePort, "Port is blocking new requests\n");
145 blocked = true;
146 // if we already scheduled a retry in this cycle, but it has not yet
147 // happened, cancel it
148 if (sendRetryEvent.scheduled()) {
149 owner.deschedule(sendRetryEvent);
150 DPRINTF(CachePort, "Port descheduled retry\n");
151 mustSendRetry = true;
152 }
153 }
154
155 void
156 BaseCache::CacheResponsePort::clearBlocked()
157 {
158 assert(blocked);
159 DPRINTF(CachePort, "Port is accepting new requests\n");
160 blocked = false;
161 if (mustSendRetry) {
162 // @TODO: need to find a better time (next cycle?)
163 owner.schedule(sendRetryEvent, curTick() + 1);
164 }
165 }
166
167 void
168 BaseCache::CacheResponsePort::processSendRetry()
169 {
170 DPRINTF(CachePort, "Port is sending retry\n");
171
172 // reset the flag and call retry
173 mustSendRetry = false;
174 sendRetryReq();
175 }
176
177 Addr
178 BaseCache::regenerateBlkAddr(CacheBlk* blk)
179 {
180 if (blk != tempBlock) {
181 return tags->regenerateBlkAddr(blk);
182 } else {
183 return tempBlock->getAddr();
184 }
185 }
186
187 void
188 BaseCache::init()
189 {
190 if (!cpuSidePort.isConnected() || !memSidePort.isConnected())
191 fatal("Cache ports on %s are not connected\n", name());
192 cpuSidePort.sendRangeChange();
193 forwardSnoops = cpuSidePort.isSnooping();
194 }
195
196 Port &
197 BaseCache::getPort(const std::string &if_name, PortID idx)
198 {
199 if (if_name == "mem_side") {
200 return memSidePort;
201 } else if (if_name == "cpu_side") {
202 return cpuSidePort;
203 } else {
204 return ClockedObject::getPort(if_name, idx);
205 }
206 }
207
208 bool
209 BaseCache::inRange(Addr addr) const
210 {
211 for (const auto& r : addrRanges) {
212 if (r.contains(addr)) {
213 return true;
214 }
215 }
216 return false;
217 }
218
219 void
220 BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
221 {
222 if (pkt->needsResponse()) {
223 // These delays should have been consumed by now
224 assert(pkt->headerDelay == 0);
225 assert(pkt->payloadDelay == 0);
226
227 pkt->makeTimingResponse();
228
229 // In this case we are considering request_time that takes
230 // into account the delay of the xbar, if any, and just
231 // lat, neglecting responseLatency, modelling hit latency
232 // just as the value of lat overriden by access(), which calls
233 // the calculateAccessLatency() function.
234 cpuSidePort.schedTimingResp(pkt, request_time);
235 } else {
236 DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
237 pkt->print());
238
239 // queue the packet for deletion, as the sending cache is
240 // still relying on it; if the block is found in access(),
241 // CleanEvict and Writeback messages will be deleted
242 // here as well
243 pendingDelete.reset(pkt);
244 }
245 }
246
247 void
248 BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
249 Tick forward_time, Tick request_time)
250 {
251 if (writeAllocator &&
252 pkt && pkt->isWrite() && !pkt->req->isUncacheable()) {
253 writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(),
254 pkt->getBlockAddr(blkSize));
255 }
256
257 if (mshr) {
258 /// MSHR hit
259 /// @note writebacks will be checked in getNextMSHR()
260 /// for any conflicting requests to the same block
261
262 //@todo remove hw_pf here
263
264 // Coalesce unless it was a software prefetch (see above).
265 if (pkt) {
266 assert(!pkt->isWriteback());
267 // CleanEvicts corresponding to blocks which have
268 // outstanding requests in MSHRs are simply sunk here
269 if (pkt->cmd == MemCmd::CleanEvict) {
270 pendingDelete.reset(pkt);
271 } else if (pkt->cmd == MemCmd::WriteClean) {
272 // A WriteClean should never coalesce with any
273 // outstanding cache maintenance requests.
274
275 // We use forward_time here because there is an
276 // uncached memory write, forwarded to WriteBuffer.
277 allocateWriteBuffer(pkt, forward_time);
278 } else {
279 DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
280 pkt->print());
281
282 assert(pkt->req->requestorId() < system->maxRequestors());
283 stats.cmdStats(pkt).mshrHits[pkt->req->requestorId()]++;
284
285 // We use forward_time here because it is the same
286 // considering new targets. We have multiple
287 // requests for the same address here. It
288 // specifies the latency to allocate an internal
289 // buffer and to schedule an event to the queued
290 // port and also takes into account the additional
291 // delay of the xbar.
292 mshr->allocateTarget(pkt, forward_time, order++,
293 allocOnFill(pkt->cmd));
294 if (mshr->getNumTargets() == numTarget) {
295 noTargetMSHR = mshr;
296 setBlocked(Blocked_NoTargets);
297 // need to be careful with this... if this mshr isn't
298 // ready yet (i.e. time > curTick()), we don't want to
299 // move it ahead of mshrs that are ready
300 // mshrQueue.moveToFront(mshr);
301 }
302 }
303 }
304 } else {
305 // no MSHR
306 assert(pkt->req->requestorId() < system->maxRequestors());
307 stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++;
308
309 if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
310 // We use forward_time here because there is an
311 // writeback or writeclean, forwarded to WriteBuffer.
312 allocateWriteBuffer(pkt, forward_time);
313 } else {
314 if (blk && blk->isValid()) {
315 // If we have a write miss to a valid block, we
316 // need to mark the block non-readable. Otherwise
317 // if we allow reads while there's an outstanding
318 // write miss, the read could return stale data
319 // out of the cache block... a more aggressive
320 // system could detect the overlap (if any) and
321 // forward data out of the MSHRs, but we don't do
322 // that yet. Note that we do need to leave the
323 // block valid so that it stays in the cache, in
324 // case we get an upgrade response (and hence no
325 // new data) when the write miss completes.
326 // As long as CPUs do proper store/load forwarding
327 // internally, and have a sufficiently weak memory
328 // model, this is probably unnecessary, but at some
329 // point it must have seemed like we needed it...
330 assert((pkt->needsWritable() &&
331 !blk->isSet(CacheBlk::WritableBit)) ||
332 pkt->req->isCacheMaintenance());
333 blk->clearCoherenceBits(CacheBlk::ReadableBit);
334 }
335 // Here we are using forward_time, modelling the latency of
336 // a miss (outbound) just as forwardLatency, neglecting the
337 // lookupLatency component.
338 allocateMissBuffer(pkt, forward_time);
339 }
340 }
341 }
342
343 void
344 BaseCache::recvTimingReq(PacketPtr pkt)
345 {
346 // anything that is merely forwarded pays for the forward latency and
347 // the delay provided by the crossbar
348 Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
349
350 Cycles lat;
351 CacheBlk *blk = nullptr;
352 bool satisfied = false;
353 {
354 PacketList writebacks;
355 // Note that lat is passed by reference here. The function
356 // access() will set the lat value.
357 satisfied = access(pkt, blk, lat, writebacks);
358
359 // After the evicted blocks are selected, they must be forwarded
360 // to the write buffer to ensure they logically precede anything
361 // happening below
362 doWritebacks(writebacks, clockEdge(lat + forwardLatency));
363 }
364
365 // Here we charge the headerDelay that takes into account the latencies
366 // of the bus, if the packet comes from it.
367 // The latency charged is just the value set by the access() function.
368 // In case of a hit we are neglecting response latency.
369 // In case of a miss we are neglecting forward latency.
370 Tick request_time = clockEdge(lat);
371 // Here we reset the timing of the packet.
372 pkt->headerDelay = pkt->payloadDelay = 0;
373
374 if (satisfied) {
375 // notify before anything else as later handleTimingReqHit might turn
376 // the packet in a response
377 ppHit->notify(pkt);
378
379 if (prefetcher && blk && blk->wasPrefetched()) {
380 blk->clearPrefetched();
381 }
382
383 handleTimingReqHit(pkt, blk, request_time);
384 } else {
385 handleTimingReqMiss(pkt, blk, forward_time, request_time);
386
387 ppMiss->notify(pkt);
388 }
389
390 if (prefetcher) {
391 // track time of availability of next prefetch, if any
392 Tick next_pf_time = prefetcher->nextPrefetchReadyTime();
393 if (next_pf_time != MaxTick) {
394 schedMemSideSendEvent(next_pf_time);
395 }
396 }
397 }
398
399 void
400 BaseCache::handleUncacheableWriteResp(PacketPtr pkt)
401 {
402 Tick completion_time = clockEdge(responseLatency) +
403 pkt->headerDelay + pkt->payloadDelay;
404
405 // Reset the bus additional time as it is now accounted for
406 pkt->headerDelay = pkt->payloadDelay = 0;
407
408 cpuSidePort.schedTimingResp(pkt, completion_time);
409 }
410
411 void
412 BaseCache::recvTimingResp(PacketPtr pkt)
413 {
414 assert(pkt->isResponse());
415
416 // all header delay should be paid for by the crossbar, unless
417 // this is a prefetch response from above
418 panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp,
419 "%s saw a non-zero packet delay\n", name());
420
421 const bool is_error = pkt->isError();
422
423 if (is_error) {
424 DPRINTF(Cache, "%s: Cache received %s with error\n", __func__,
425 pkt->print());
426 }
427
428 DPRINTF(Cache, "%s: Handling response %s\n", __func__,
429 pkt->print());
430
431 // if this is a write, we should be looking at an uncacheable
432 // write
433 if (pkt->isWrite()) {
434 assert(pkt->req->isUncacheable());
435 handleUncacheableWriteResp(pkt);
436 return;
437 }
438
439 // we have dealt with any (uncacheable) writes above, from here on
440 // we know we are dealing with an MSHR due to a miss or a prefetch
441 MSHR *mshr = dynamic_cast<MSHR*>(pkt->popSenderState());
442 assert(mshr);
443
444 if (mshr == noTargetMSHR) {
445 // we always clear at least one target
446 clearBlocked(Blocked_NoTargets);
447 noTargetMSHR = nullptr;
448 }
449
450 // Initial target is used just for stats
451 const QueueEntry::Target *initial_tgt = mshr->getTarget();
452 const Tick miss_latency = curTick() - initial_tgt->recvTime;
453 if (pkt->req->isUncacheable()) {
454 assert(pkt->req->requestorId() < system->maxRequestors());
455 stats.cmdStats(initial_tgt->pkt)
456 .mshrUncacheableLatency[pkt->req->requestorId()] += miss_latency;
457 } else {
458 assert(pkt->req->requestorId() < system->maxRequestors());
459 stats.cmdStats(initial_tgt->pkt)
460 .mshrMissLatency[pkt->req->requestorId()] += miss_latency;
461 }
462
463 PacketList writebacks;
464
465 bool is_fill = !mshr->isForward &&
466 (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp ||
467 mshr->wasWholeLineWrite);
468
469 // make sure that if the mshr was due to a whole line write then
470 // the response is an invalidation
471 assert(!mshr->wasWholeLineWrite || pkt->isInvalidate());
472
473 CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
474
475 if (is_fill && !is_error) {
476 DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n",
477 pkt->getAddr());
478
479 const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ?
480 writeAllocator->allocate() : mshr->allocOnFill();
481 blk = handleFill(pkt, blk, writebacks, allocate);
482 assert(blk != nullptr);
483 ppFill->notify(pkt);
484 }
485
486 if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
487 // The block was marked not readable while there was a pending
488 // cache maintenance operation, restore its flag.
489 blk->setCoherenceBits(CacheBlk::ReadableBit);
490
491 // This was a cache clean operation (without invalidate)
492 // and we have a copy of the block already. Since there
493 // is no invalidation, we can promote targets that don't
494 // require a writable copy
495 mshr->promoteReadable();
496 }
497
498 if (blk && blk->isSet(CacheBlk::WritableBit) &&
499 !pkt->req->isCacheInvalidate()) {
500 // If at this point the referenced block is writable and the
501 // response is not a cache invalidate, we promote targets that
502 // were deferred as we couldn't guarrantee a writable copy
503 mshr->promoteWritable();
504 }
505
506 serviceMSHRTargets(mshr, pkt, blk);
507
508 if (mshr->promoteDeferredTargets()) {
509 // avoid later read getting stale data while write miss is
510 // outstanding.. see comment in timingAccess()
511 if (blk) {
512 blk->clearCoherenceBits(CacheBlk::ReadableBit);
513 }
514 mshrQueue.markPending(mshr);
515 schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
516 } else {
517 // while we deallocate an mshr from the queue we still have to
518 // check the isFull condition before and after as we might
519 // have been using the reserved entries already
520 const bool was_full = mshrQueue.isFull();
521 mshrQueue.deallocate(mshr);
522 if (was_full && !mshrQueue.isFull()) {
523 clearBlocked(Blocked_NoMSHRs);
524 }
525
526 // Request the bus for a prefetch if this deallocation freed enough
527 // MSHRs for a prefetch to take place
528 if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
529 Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(),
530 clockEdge());
531 if (next_pf_time != MaxTick)
532 schedMemSideSendEvent(next_pf_time);
533 }
534 }
535
536 // if we used temp block, check to see if its valid and then clear it out
537 if (blk == tempBlock && tempBlock->isValid()) {
538 evictBlock(blk, writebacks);
539 }
540
541 const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
542 // copy writebacks to write buffer
543 doWritebacks(writebacks, forward_time);
544
545 DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print());
546 delete pkt;
547 }
548
549
550 Tick
551 BaseCache::recvAtomic(PacketPtr pkt)
552 {
553 // should assert here that there are no outstanding MSHRs or
554 // writebacks... that would mean that someone used an atomic
555 // access in timing mode
556
557 // We use lookupLatency here because it is used to specify the latency
558 // to access.
559 Cycles lat = lookupLatency;
560
561 CacheBlk *blk = nullptr;
562 PacketList writebacks;
563 bool satisfied = access(pkt, blk, lat, writebacks);
564
565 if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) {
566 // A cache clean opearation is looking for a dirty
567 // block. If a dirty block is encountered a WriteClean
568 // will update any copies to the path to the memory
569 // until the point of reference.
570 DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
571 __func__, pkt->print(), blk->print());
572 PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
573 writebacks.push_back(wb_pkt);
574 pkt->setSatisfied();
575 }
576
577 // handle writebacks resulting from the access here to ensure they
578 // logically precede anything happening below
579 doWritebacksAtomic(writebacks);
580 assert(writebacks.empty());
581
582 if (!satisfied) {
583 lat += handleAtomicReqMiss(pkt, blk, writebacks);
584 }
585
586 // Note that we don't invoke the prefetcher at all in atomic mode.
587 // It's not clear how to do it properly, particularly for
588 // prefetchers that aggressively generate prefetch candidates and
589 // rely on bandwidth contention to throttle them; these will tend
590 // to pollute the cache in atomic mode since there is no bandwidth
591 // contention. If we ever do want to enable prefetching in atomic
592 // mode, though, this is the place to do it... see timingAccess()
593 // for an example (though we'd want to issue the prefetch(es)
594 // immediately rather than calling requestMemSideBus() as we do
595 // there).
596
597 // do any writebacks resulting from the response handling
598 doWritebacksAtomic(writebacks);
599
600 // if we used temp block, check to see if its valid and if so
601 // clear it out, but only do so after the call to recvAtomic is
602 // finished so that any downstream observers (such as a snoop
603 // filter), first see the fill, and only then see the eviction
604 if (blk == tempBlock && tempBlock->isValid()) {
605 // the atomic CPU calls recvAtomic for fetch and load/store
606 // sequentuially, and we may already have a tempBlock
607 // writeback from the fetch that we have not yet sent
608 if (tempBlockWriteback) {
609 // if that is the case, write the prevoius one back, and
610 // do not schedule any new event
611 writebackTempBlockAtomic();
612 } else {
613 // the writeback/clean eviction happens after the call to
614 // recvAtomic has finished (but before any successive
615 // calls), so that the response handling from the fill is
616 // allowed to happen first
617 schedule(writebackTempBlockAtomicEvent, curTick());
618 }
619
620 tempBlockWriteback = evictBlock(blk);
621 }
622
623 if (pkt->needsResponse()) {
624 pkt->makeAtomicResponse();
625 }
626
627 return lat * clockPeriod();
628 }
629
630 void
631 BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side)
632 {
633 Addr blk_addr = pkt->getBlockAddr(blkSize);
634 bool is_secure = pkt->isSecure();
635 CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure);
636 MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
637
638 pkt->pushLabel(name());
639
640 CacheBlkPrintWrapper cbpw(blk);
641
642 // Note that just because an L2/L3 has valid data doesn't mean an
643 // L1 doesn't have a more up-to-date modified copy that still
644 // needs to be found. As a result we always update the request if
645 // we have it, but only declare it satisfied if we are the owner.
646
647 // see if we have data at all (owned or otherwise)
648 bool have_data = blk && blk->isValid()
649 && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize,
650 blk->data);
651
652 // data we have is dirty if marked as such or if we have an
653 // in-service MSHR that is pending a modified line
654 bool have_dirty =
655 have_data && (blk->isSet(CacheBlk::DirtyBit) ||
656 (mshr && mshr->inService && mshr->isPendingModified()));
657
658 bool done = have_dirty ||
659 cpuSidePort.trySatisfyFunctional(pkt) ||
660 mshrQueue.trySatisfyFunctional(pkt) ||
661 writeBuffer.trySatisfyFunctional(pkt) ||
662 memSidePort.trySatisfyFunctional(pkt);
663
664 DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__, pkt->print(),
665 (blk && blk->isValid()) ? "valid " : "",
666 have_data ? "data " : "", done ? "done " : "");
667
668 // We're leaving the cache, so pop cache->name() label
669 pkt->popLabel();
670
671 if (done) {
672 pkt->makeResponse();
673 } else {
674 // if it came as a request from the CPU side then make sure it
675 // continues towards the memory side
676 if (from_cpu_side) {
677 memSidePort.sendFunctional(pkt);
678 } else if (cpuSidePort.isSnooping()) {
679 // if it came from the memory side, it must be a snoop request
680 // and we should only forward it if we are forwarding snoops
681 cpuSidePort.sendFunctionalSnoop(pkt);
682 }
683 }
684 }
685
686 void
687 BaseCache::updateBlockData(CacheBlk *blk, const PacketPtr cpkt,
688 bool has_old_data)
689 {
690 DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure());
691 if (ppDataUpdate->hasListeners()) {
692 if (has_old_data) {
693 data_update.oldData = std::vector<uint64_t>(blk->data,
694 blk->data + (blkSize / sizeof(uint64_t)));
695 }
696 }
697
698 // Actually perform the data update
699 if (cpkt) {
700 cpkt->writeDataToBlock(blk->data, blkSize);
701 }
702
703 if (ppDataUpdate->hasListeners()) {
704 if (cpkt) {
705 data_update.newData = std::vector<uint64_t>(blk->data,
706 blk->data + (blkSize / sizeof(uint64_t)));
707 }
708 ppDataUpdate->notify(data_update);
709 }
710 }
711
712 void
713 BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
714 {
715 assert(pkt->isRequest());
716
717 uint64_t overwrite_val;
718 bool overwrite_mem;
719 uint64_t condition_val64;
720 uint32_t condition_val32;
721
722 int offset = pkt->getOffset(blkSize);
723 uint8_t *blk_data = blk->data + offset;
724
725 assert(sizeof(uint64_t) >= pkt->getSize());
726
727 // Get a copy of the old block's contents for the probe before the update
728 DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure());
729 if (ppDataUpdate->hasListeners()) {
730 data_update.oldData = std::vector<uint64_t>(blk->data,
731 blk->data + (blkSize / sizeof(uint64_t)));
732 }
733
734 overwrite_mem = true;
735 // keep a copy of our possible write value, and copy what is at the
736 // memory address into the packet
737 pkt->writeData((uint8_t *)&overwrite_val);
738 pkt->setData(blk_data);
739
740 if (pkt->req->isCondSwap()) {
741 if (pkt->getSize() == sizeof(uint64_t)) {
742 condition_val64 = pkt->req->getExtraData();
743 overwrite_mem = !std::memcmp(&condition_val64, blk_data,
744 sizeof(uint64_t));
745 } else if (pkt->getSize() == sizeof(uint32_t)) {
746 condition_val32 = (uint32_t)pkt->req->getExtraData();
747 overwrite_mem = !std::memcmp(&condition_val32, blk_data,
748 sizeof(uint32_t));
749 } else
750 panic("Invalid size for conditional read/write\n");
751 }
752
753 if (overwrite_mem) {
754 std::memcpy(blk_data, &overwrite_val, pkt->getSize());
755 blk->setCoherenceBits(CacheBlk::DirtyBit);
756
757 if (ppDataUpdate->hasListeners()) {
758 data_update.newData = std::vector<uint64_t>(blk->data,
759 blk->data + (blkSize / sizeof(uint64_t)));
760 ppDataUpdate->notify(data_update);
761 }
762 }
763 }
764
765 QueueEntry*
766 BaseCache::getNextQueueEntry()
767 {
768 // Check both MSHR queue and write buffer for potential requests,
769 // note that null does not mean there is no request, it could
770 // simply be that it is not ready
771 MSHR *miss_mshr = mshrQueue.getNext();
772 WriteQueueEntry *wq_entry = writeBuffer.getNext();
773
774 // If we got a write buffer request ready, first priority is a
775 // full write buffer, otherwise we favour the miss requests
776 if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) {
777 // need to search MSHR queue for conflicting earlier miss.
778 MSHR *conflict_mshr = mshrQueue.findPending(wq_entry);
779
780 if (conflict_mshr && conflict_mshr->order < wq_entry->order) {
781 // Service misses in order until conflict is cleared.
782 return conflict_mshr;
783
784 // @todo Note that we ignore the ready time of the conflict here
785 }
786
787 // No conflicts; issue write
788 return wq_entry;
789 } else if (miss_mshr) {
790 // need to check for conflicting earlier writeback
791 WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr);
792 if (conflict_mshr) {
793 // not sure why we don't check order here... it was in the
794 // original code but commented out.
795
796 // The only way this happens is if we are
797 // doing a write and we didn't have permissions
798 // then subsequently saw a writeback (owned got evicted)
799 // We need to make sure to perform the writeback first
800 // To preserve the dirty data, then we can issue the write
801
802 // should we return wq_entry here instead? I.e. do we
803 // have to flush writes in order? I don't think so... not
804 // for Alpha anyway. Maybe for x86?
805 return conflict_mshr;
806
807 // @todo Note that we ignore the ready time of the conflict here
808 }
809
810 // No conflicts; issue read
811 return miss_mshr;
812 }
813
814 // fall through... no pending requests. Try a prefetch.
815 assert(!miss_mshr && !wq_entry);
816 if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
817 // If we have a miss queue slot, we can try a prefetch
818 PacketPtr pkt = prefetcher->getPacket();
819 if (pkt) {
820 Addr pf_addr = pkt->getBlockAddr(blkSize);
821 if (!tags->findBlock(pf_addr, pkt->isSecure()) &&
822 !mshrQueue.findMatch(pf_addr, pkt->isSecure()) &&
823 !writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
824 // Update statistic on number of prefetches issued
825 // (hwpf_mshr_misses)
826 assert(pkt->req->requestorId() < system->maxRequestors());
827 stats.cmdStats(pkt).mshrMisses[pkt->req->requestorId()]++;
828
829 // allocate an MSHR and return it, note
830 // that we send the packet straight away, so do not
831 // schedule the send
832 return allocateMissBuffer(pkt, curTick(), false);
833 } else {
834 // free the request and packet
835 delete pkt;
836 }
837 }
838 }
839
840 return nullptr;
841 }
842
843 bool
844 BaseCache::handleEvictions(std::vector<CacheBlk*> &evict_blks,
845 PacketList &writebacks)
846 {
847 bool replacement = false;
848 for (const auto& blk : evict_blks) {
849 if (blk->isValid()) {
850 replacement = true;
851
852 const MSHR* mshr =
853 mshrQueue.findMatch(regenerateBlkAddr(blk), blk->isSecure());
854 if (mshr) {
855 // Must be an outstanding upgrade or clean request on a block
856 // we're about to replace
857 assert((!blk->isSet(CacheBlk::WritableBit) &&
858 mshr->needsWritable()) || mshr->isCleaning());
859 return false;
860 }
861 }
862 }
863
864 // The victim will be replaced by a new entry, so increase the replacement
865 // counter if a valid block is being replaced
866 if (replacement) {
867 stats.replacements++;
868
869 // Evict valid blocks associated to this victim block
870 for (auto& blk : evict_blks) {
871 if (blk->isValid()) {
872 evictBlock(blk, writebacks);
873 }
874 }
875 }
876
877 return true;
878 }
879
880 bool
881 BaseCache::updateCompressionData(CacheBlk *&blk, const uint64_t* data,
882 PacketList &writebacks)
883 {
884 // tempBlock does not exist in the tags, so don't do anything for it.
885 if (blk == tempBlock) {
886 return true;
887 }
888
889 // The compressor is called to compress the updated data, so that its
890 // metadata can be updated.
891 Cycles compression_lat = Cycles(0);
892 Cycles decompression_lat = Cycles(0);
893 const auto comp_data =
894 compressor->compress(data, compression_lat, decompression_lat);
895 std::size_t compression_size = comp_data->getSizeBits();
896
897 // Get previous compressed size
898 CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
899 M5_VAR_USED const std::size_t prev_size = compression_blk->getSizeBits();
900
901 // If compressed size didn't change enough to modify its co-allocatability
902 // there is nothing to do. Otherwise we may be facing a data expansion
903 // (block passing from more compressed to less compressed state), or a
904 // data contraction (less to more).
905 bool is_data_expansion = false;
906 bool is_data_contraction = false;
907 const CompressionBlk::OverwriteType overwrite_type =
908 compression_blk->checkExpansionContraction(compression_size);
909 std::string op_name = "";
910 if (overwrite_type == CompressionBlk::DATA_EXPANSION) {
911 op_name = "expansion";
912 is_data_expansion = true;
913 } else if ((overwrite_type == CompressionBlk::DATA_CONTRACTION) &&
914 moveContractions) {
915 op_name = "contraction";
916 is_data_contraction = true;
917 }
918
919 // If block changed compression state, it was possibly co-allocated with
920 // other blocks and cannot be co-allocated anymore, so one or more blocks
921 // must be evicted to make room for the expanded/contracted block
922 std::vector<CacheBlk*> evict_blks;
923 if (is_data_expansion || is_data_contraction) {
924 std::vector<CacheBlk*> evict_blks;
925 bool victim_itself = false;
926 CacheBlk *victim = nullptr;
927 if (replaceExpansions || is_data_contraction) {
928 victim = tags->findVictim(regenerateBlkAddr(blk),
929 blk->isSecure(), compression_size, evict_blks);
930
931 // It is valid to return nullptr if there is no victim
932 if (!victim) {
933 return false;
934 }
935
936 // If the victim block is itself the block won't need to be moved,
937 // and the victim should not be evicted
938 if (blk == victim) {
939 victim_itself = true;
940 auto it = std::find_if(evict_blks.begin(), evict_blks.end(),
941 [&blk](CacheBlk* evict_blk){ return evict_blk == blk; });
942 evict_blks.erase(it);
943 }
944
945 // Print victim block's information
946 DPRINTF(CacheRepl, "Data %s replacement victim: %s\n",
947 op_name, victim->print());
948 } else {
949 // If we do not move the expanded block, we must make room for
950 // the expansion to happen, so evict every co-allocated block
951 const SuperBlk* superblock = static_cast<const SuperBlk*>(
952 compression_blk->getSectorBlock());
953 for (auto& sub_blk : superblock->blks) {
954 if (sub_blk->isValid() && (blk != sub_blk)) {
955 evict_blks.push_back(sub_blk);
956 }
957 }
958 }
959
960 // Try to evict blocks; if it fails, give up on update
961 if (!handleEvictions(evict_blks, writebacks)) {
962 return false;
963 }
964
965 DPRINTF(CacheComp, "Data %s: [%s] from %d to %d bits\n",
966 op_name, blk->print(), prev_size, compression_size);
967
968 if (!victim_itself && (replaceExpansions || is_data_contraction)) {
969 // Move the block's contents to the invalid block so that it now
970 // co-allocates with the other existing superblock entry
971 tags->moveBlock(blk, victim);
972 blk = victim;
973 compression_blk = static_cast<CompressionBlk*>(blk);
974 }
975 }
976
977 // Update the number of data expansions/contractions
978 if (is_data_expansion) {
979 stats.dataExpansions++;
980 } else if (is_data_contraction) {
981 stats.dataContractions++;
982 }
983
984 compression_blk->setSizeBits(compression_size);
985 compression_blk->setDecompressionLatency(decompression_lat);
986
987 return true;
988 }
989
990 void
991 BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
992 {
993 assert(pkt->isRequest());
994
995 assert(blk && blk->isValid());
996 // Occasionally this is not true... if we are a lower-level cache
997 // satisfying a string of Read and ReadEx requests from
998 // upper-level caches, a Read will mark the block as shared but we
999 // can satisfy a following ReadEx anyway since we can rely on the
1000 // Read requestor(s) to have buffered the ReadEx snoop and to
1001 // invalidate their blocks after receiving them.
1002 // assert(!pkt->needsWritable() || blk->isSet(CacheBlk::WritableBit));
1003 assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
1004
1005 // Check RMW operations first since both isRead() and
1006 // isWrite() will be true for them
1007 if (pkt->cmd == MemCmd::SwapReq) {
1008 if (pkt->isAtomicOp()) {
1009 // Get a copy of the old block's contents for the probe before
1010 // the update
1011 DataUpdate data_update(regenerateBlkAddr(blk), blk->isSecure());
1012 if (ppDataUpdate->hasListeners()) {
1013 data_update.oldData = std::vector<uint64_t>(blk->data,
1014 blk->data + (blkSize / sizeof(uint64_t)));
1015 }
1016
1017 // extract data from cache and save it into the data field in
1018 // the packet as a return value from this atomic op
1019 int offset = tags->extractBlkOffset(pkt->getAddr());
1020 uint8_t *blk_data = blk->data + offset;
1021 pkt->setData(blk_data);
1022
1023 // execute AMO operation
1024 (*(pkt->getAtomicOp()))(blk_data);
1025
1026 // Inform of this block's data contents update
1027 if (ppDataUpdate->hasListeners()) {
1028 data_update.newData = std::vector<uint64_t>(blk->data,
1029 blk->data + (blkSize / sizeof(uint64_t)));
1030 ppDataUpdate->notify(data_update);
1031 }
1032
1033 // set block status to dirty
1034 blk->setCoherenceBits(CacheBlk::DirtyBit);
1035 } else {
1036 cmpAndSwap(blk, pkt);
1037 }
1038 } else if (pkt->isWrite()) {
1039 // we have the block in a writable state and can go ahead,
1040 // note that the line may be also be considered writable in
1041 // downstream caches along the path to memory, but always
1042 // Exclusive, and never Modified
1043 assert(blk->isSet(CacheBlk::WritableBit));
1044 // Write or WriteLine at the first cache with block in writable state
1045 if (blk->checkWrite(pkt)) {
1046 updateBlockData(blk, pkt, true);
1047 }
1048 // Always mark the line as dirty (and thus transition to the
1049 // Modified state) even if we are a failed StoreCond so we
1050 // supply data to any snoops that have appended themselves to
1051 // this cache before knowing the store will fail.
1052 blk->setCoherenceBits(CacheBlk::DirtyBit);
1053 DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print());
1054 } else if (pkt->isRead()) {
1055 if (pkt->isLLSC()) {
1056 blk->trackLoadLocked(pkt);
1057 }
1058
1059 // all read responses have a data payload
1060 assert(pkt->hasRespData());
1061 pkt->setDataFromBlock(blk->data, blkSize);
1062 } else if (pkt->isUpgrade()) {
1063 // sanity check
1064 assert(!pkt->hasSharers());
1065
1066 if (blk->isSet(CacheBlk::DirtyBit)) {
1067 // we were in the Owned state, and a cache above us that
1068 // has the line in Shared state needs to be made aware
1069 // that the data it already has is in fact dirty
1070 pkt->setCacheResponding();
1071 blk->clearCoherenceBits(CacheBlk::DirtyBit);
1072 }
1073 } else if (pkt->isClean()) {
1074 blk->clearCoherenceBits(CacheBlk::DirtyBit);
1075 } else {
1076 assert(pkt->isInvalidate());
1077 invalidateBlock(blk);
1078 DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
1079 pkt->print());
1080 }
1081 }
1082
1083 /////////////////////////////////////////////////////
1084 //
1085 // Access path: requests coming in from the CPU side
1086 //
1087 /////////////////////////////////////////////////////
1088 Cycles
1089 BaseCache::calculateTagOnlyLatency(const uint32_t delay,
1090 const Cycles lookup_lat) const
1091 {
1092 // A tag-only access has to wait for the packet to arrive in order to
1093 // perform the tag lookup.
1094 return ticksToCycles(delay) + lookup_lat;
1095 }
1096
1097 Cycles
1098 BaseCache::calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
1099 const Cycles lookup_lat) const
1100 {
1101 Cycles lat(0);
1102
1103 if (blk != nullptr) {
1104 // As soon as the access arrives, for sequential accesses first access
1105 // tags, then the data entry. In the case of parallel accesses the
1106 // latency is dictated by the slowest of tag and data latencies.
1107 if (sequentialAccess) {
1108 lat = ticksToCycles(delay) + lookup_lat + dataLatency;
1109 } else {
1110 lat = ticksToCycles(delay) + std::max(lookup_lat, dataLatency);
1111 }
1112
1113 // Check if the block to be accessed is available. If not, apply the
1114 // access latency on top of when the block is ready to be accessed.
1115 const Tick tick = curTick() + delay;
1116 const Tick when_ready = blk->getWhenReady();
1117 if (when_ready > tick &&
1118 ticksToCycles(when_ready - tick) > lat) {
1119 lat += ticksToCycles(when_ready - tick);
1120 }
1121 } else {
1122 // In case of a miss, we neglect the data access in a parallel
1123 // configuration (i.e., the data access will be stopped as soon as
1124 // we find out it is a miss), and use the tag-only latency.
1125 lat = calculateTagOnlyLatency(delay, lookup_lat);
1126 }
1127
1128 return lat;
1129 }
1130
1131 bool
1132 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
1133 PacketList &writebacks)
1134 {
1135 // sanity check
1136 assert(pkt->isRequest());
1137
1138 chatty_assert(!(isReadOnly && pkt->isWrite()),
1139 "Should never see a write in a read-only cache %s\n",
1140 name());
1141
1142 // Access block in the tags
1143 Cycles tag_latency(0);
1144 blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);
1145
1146 DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
1147 blk ? "hit " + blk->print() : "miss");
1148
1149 if (pkt->req->isCacheMaintenance()) {
1150 // A cache maintenance operation is always forwarded to the
1151 // memory below even if the block is found in dirty state.
1152
1153 // We defer any changes to the state of the block until we
1154 // create and mark as in service the mshr for the downstream
1155 // packet.
1156
1157 // Calculate access latency on top of when the packet arrives. This
1158 // takes into account the bus delay.
1159 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1160
1161 return false;
1162 }
1163
1164 if (pkt->isEviction()) {
1165 // We check for presence of block in above caches before issuing
1166 // Writeback or CleanEvict to write buffer. Therefore the only
1167 // possible cases can be of a CleanEvict packet coming from above
1168 // encountering a Writeback generated in this cache peer cache and
1169 // waiting in the write buffer. Cases of upper level peer caches
1170 // generating CleanEvict and Writeback or simply CleanEvict and
1171 // CleanEvict almost simultaneously will be caught by snoops sent out
1172 // by crossbar.
1173 WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
1174 pkt->isSecure());
1175 if (wb_entry) {
1176 assert(wb_entry->getNumTargets() == 1);
1177 PacketPtr wbPkt = wb_entry->getTarget()->pkt;
1178 assert(wbPkt->isWriteback());
1179
1180 if (pkt->isCleanEviction()) {
1181 // The CleanEvict and WritebackClean snoops into other
1182 // peer caches of the same level while traversing the
1183 // crossbar. If a copy of the block is found, the
1184 // packet is deleted in the crossbar. Hence, none of
1185 // the other upper level caches connected to this
1186 // cache have the block, so we can clear the
1187 // BLOCK_CACHED flag in the Writeback if set and
1188 // discard the CleanEvict by returning true.
1189 wbPkt->clearBlockCached();
1190
1191 // A clean evict does not need to access the data array
1192 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1193
1194 return true;
1195 } else {
1196 assert(pkt->cmd == MemCmd::WritebackDirty);
1197 // Dirty writeback from above trumps our clean
1198 // writeback... discard here
1199 // Note: markInService will remove entry from writeback buffer.
1200 markInService(wb_entry);
1201 delete wbPkt;
1202 }
1203 }
1204 }
1205
1206 // The critical latency part of a write depends only on the tag access
1207 if (pkt->isWrite()) {
1208 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1209 }
1210
1211 // Writeback handling is special case. We can write the block into
1212 // the cache without having a writeable copy (or any copy at all).
1213 if (pkt->isWriteback()) {
1214 assert(blkSize == pkt->getSize());
1215
1216 // we could get a clean writeback while we are having
1217 // outstanding accesses to a block, do the simple thing for
1218 // now and drop the clean writeback so that we do not upset
1219 // any ordering/decisions about ownership already taken
1220 if (pkt->cmd == MemCmd::WritebackClean &&
1221 mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) {
1222 DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, "
1223 "dropping\n", pkt->getAddr());
1224
1225 // A writeback searches for the block, then writes the data.
1226 // As the writeback is being dropped, the data is not touched,
1227 // and we just had to wait for the time to find a match in the
1228 // MSHR. As of now assume a mshr queue search takes as long as
1229 // a tag lookup for simplicity.
1230 return true;
1231 }
1232
1233 const bool has_old_data = blk && blk->isValid();
1234 if (!blk) {
1235 // need to do a replacement
1236 blk = allocateBlock(pkt, writebacks);
1237 if (!blk) {
1238 // no replaceable block available: give up, fwd to next level.
1239 incMissCount(pkt);
1240 return false;
1241 }
1242
1243 blk->setCoherenceBits(CacheBlk::ReadableBit);
1244 } else if (compressor) {
1245 // This is an overwrite to an existing block, therefore we need
1246 // to check for data expansion (i.e., block was compressed with
1247 // a smaller size, and now it doesn't fit the entry anymore).
1248 // If that is the case we might need to evict blocks.
1249 if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1250 writebacks)) {
1251 invalidateBlock(blk);
1252 return false;
1253 }
1254 }
1255
1256 // only mark the block dirty if we got a writeback command,
1257 // and leave it as is for a clean writeback
1258 if (pkt->cmd == MemCmd::WritebackDirty) {
1259 // TODO: the coherent cache can assert that the dirty bit is set
1260 blk->setCoherenceBits(CacheBlk::DirtyBit);
1261 }
1262 // if the packet does not have sharers, it is passing
1263 // writable, and we got the writeback in Modified or Exclusive
1264 // state, if not we are in the Owned or Shared state
1265 if (!pkt->hasSharers()) {
1266 blk->setCoherenceBits(CacheBlk::WritableBit);
1267 }
1268 // nothing else to do; writeback doesn't expect response
1269 assert(!pkt->needsResponse());
1270
1271 updateBlockData(blk, pkt, has_old_data);
1272 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1273 incHitCount(pkt);
1274
1275 // When the packet metadata arrives, the tag lookup will be done while
1276 // the payload is arriving. Then the block will be ready to access as
1277 // soon as the fill is done
1278 blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1279 std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1280
1281 return true;
1282 } else if (pkt->cmd == MemCmd::CleanEvict) {
1283 // A CleanEvict does not need to access the data array
1284 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1285
1286 if (blk) {
1287 // Found the block in the tags, need to stop CleanEvict from
1288 // propagating further down the hierarchy. Returning true will
1289 // treat the CleanEvict like a satisfied write request and delete
1290 // it.
1291 return true;
1292 }
1293 // We didn't find the block here, propagate the CleanEvict further
1294 // down the memory hierarchy. Returning false will treat the CleanEvict
1295 // like a Writeback which could not find a replaceable block so has to
1296 // go to next level.
1297 return false;
1298 } else if (pkt->cmd == MemCmd::WriteClean) {
1299 // WriteClean handling is a special case. We can allocate a
1300 // block directly if it doesn't exist and we can update the
1301 // block immediately. The WriteClean transfers the ownership
1302 // of the block as well.
1303 assert(blkSize == pkt->getSize());
1304
1305 const bool has_old_data = blk && blk->isValid();
1306 if (!blk) {
1307 if (pkt->writeThrough()) {
1308 // if this is a write through packet, we don't try to
1309 // allocate if the block is not present
1310 return false;
1311 } else {
1312 // a writeback that misses needs to allocate a new block
1313 blk = allocateBlock(pkt, writebacks);
1314 if (!blk) {
1315 // no replaceable block available: give up, fwd to
1316 // next level.
1317 incMissCount(pkt);
1318 return false;
1319 }
1320
1321 blk->setCoherenceBits(CacheBlk::ReadableBit);
1322 }
1323 } else if (compressor) {
1324 // This is an overwrite to an existing block, therefore we need
1325 // to check for data expansion (i.e., block was compressed with
1326 // a smaller size, and now it doesn't fit the entry anymore).
1327 // If that is the case we might need to evict blocks.
1328 if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1329 writebacks)) {
1330 invalidateBlock(blk);
1331 return false;
1332 }
1333 }
1334
1335 // at this point either this is a writeback or a write-through
1336 // write clean operation and the block is already in this
1337 // cache, we need to update the data and the block flags
1338 assert(blk);
1339 // TODO: the coherent cache can assert that the dirty bit is set
1340 if (!pkt->writeThrough()) {
1341 blk->setCoherenceBits(CacheBlk::DirtyBit);
1342 }
1343 // nothing else to do; writeback doesn't expect response
1344 assert(!pkt->needsResponse());
1345
1346 updateBlockData(blk, pkt, has_old_data);
1347 DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1348
1349 incHitCount(pkt);
1350
1351 // When the packet metadata arrives, the tag lookup will be done while
1352 // the payload is arriving. Then the block will be ready to access as
1353 // soon as the fill is done
1354 blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1355 std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1356
1357 // If this a write-through packet it will be sent to cache below
1358 return !pkt->writeThrough();
1359 } else if (blk && (pkt->needsWritable() ?
1360 blk->isSet(CacheBlk::WritableBit) :
1361 blk->isSet(CacheBlk::ReadableBit))) {
1362 // OK to satisfy access
1363 incHitCount(pkt);
1364
1365 // Calculate access latency based on the need to access the data array
1366 if (pkt->isRead()) {
1367 lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1368
1369 // When a block is compressed, it must first be decompressed
1370 // before being read. This adds to the access latency.
1371 if (compressor) {
1372 lat += compressor->getDecompressionLatency(blk);
1373 }
1374 } else {
1375 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1376 }
1377
1378 satisfyRequest(pkt, blk);
1379 maintainClusivity(pkt->fromCache(), blk);
1380
1381 return true;
1382 }
1383
1384 // Can't satisfy access normally... either no block (blk == nullptr)
1385 // or have block but need writable
1386
1387 incMissCount(pkt);
1388
1389 lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1390
1391 if (!blk && pkt->isLLSC() && pkt->isWrite()) {
1392 // complete miss on store conditional... just give up now
1393 pkt->req->setExtraData(0);
1394 return true;
1395 }
1396
1397 return false;
1398 }
1399
1400 void
1401 BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk)
1402 {
1403 if (from_cache && blk && blk->isValid() &&
1404 !blk->isSet(CacheBlk::DirtyBit) && clusivity == Enums::mostly_excl) {
1405 // if we have responded to a cache, and our block is still
1406 // valid, but not dirty, and this cache is mostly exclusive
1407 // with respect to the cache above, drop the block
1408 invalidateBlock(blk);
1409 }
1410 }
1411
1412 CacheBlk*
1413 BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
1414 bool allocate)
1415 {
1416 assert(pkt->isResponse());
1417 Addr addr = pkt->getAddr();
1418 bool is_secure = pkt->isSecure();
1419 const bool has_old_data = blk && blk->isValid();
1420 #if TRACING_ON
1421 const std::string old_state = blk ? blk->print() : "";
1422 #endif
1423
1424 // When handling a fill, we should have no writes to this line.
1425 assert(addr == pkt->getBlockAddr(blkSize));
1426 assert(!writeBuffer.findMatch(addr, is_secure));
1427
1428 if (!blk) {
1429 // better have read new data...
1430 assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp);
1431
1432 // need to do a replacement if allocating, otherwise we stick
1433 // with the temporary storage
1434 blk = allocate ? allocateBlock(pkt, writebacks) : nullptr;
1435
1436 if (!blk) {
1437 // No replaceable block or a mostly exclusive
1438 // cache... just use temporary storage to complete the
1439 // current request and then get rid of it
1440 blk = tempBlock;
1441 tempBlock->insert(addr, is_secure);
1442 DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr,
1443 is_secure ? "s" : "ns");
1444 }
1445 } else {
1446 // existing block... probably an upgrade
1447 // don't clear block status... if block is already dirty we
1448 // don't want to lose that
1449 }
1450
1451 // Block is guaranteed to be valid at this point
1452 assert(blk->isValid());
1453 assert(blk->isSecure() == is_secure);
1454 assert(regenerateBlkAddr(blk) == addr);
1455
1456 blk->setCoherenceBits(CacheBlk::ReadableBit);
1457
1458 // sanity check for whole-line writes, which should always be
1459 // marked as writable as part of the fill, and then later marked
1460 // dirty as part of satisfyRequest
1461 if (pkt->cmd == MemCmd::InvalidateResp) {
1462 assert(!pkt->hasSharers());
1463 }
1464
1465 // here we deal with setting the appropriate state of the line,
1466 // and we start by looking at the hasSharers flag, and ignore the
1467 // cacheResponding flag (normally signalling dirty data) if the
1468 // packet has sharers, thus the line is never allocated as Owned
1469 // (dirty but not writable), and always ends up being either
1470 // Shared, Exclusive or Modified, see Packet::setCacheResponding
1471 // for more details
1472 if (!pkt->hasSharers()) {
1473 // we could get a writable line from memory (rather than a
1474 // cache) even in a read-only cache, note that we set this bit
1475 // even for a read-only cache, possibly revisit this decision
1476 blk->setCoherenceBits(CacheBlk::WritableBit);
1477
1478 // check if we got this via cache-to-cache transfer (i.e., from a
1479 // cache that had the block in Modified or Owned state)
1480 if (pkt->cacheResponding()) {
1481 // we got the block in Modified state, and invalidated the
1482 // owners copy
1483 blk->setCoherenceBits(CacheBlk::DirtyBit);
1484
1485 chatty_assert(!isReadOnly, "Should never see dirty snoop response "
1486 "in read-only cache %s\n", name());
1487
1488 }
1489 }
1490
1491 DPRINTF(Cache, "Block addr %#llx (%s) moving from %s to %s\n",
1492 addr, is_secure ? "s" : "ns", old_state, blk->print());
1493
1494 // if we got new data, copy it in (checking for a read response
1495 // and a response that has data is the same in the end)
1496 if (pkt->isRead()) {
1497 // sanity checks
1498 assert(pkt->hasData());
1499 assert(pkt->getSize() == blkSize);
1500
1501 updateBlockData(blk, pkt, has_old_data);
1502 }
1503 // The block will be ready when the payload arrives and the fill is done
1504 blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1505 pkt->payloadDelay);
1506
1507 return blk;
1508 }
1509
1510 CacheBlk*
1511 BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
1512 {
1513 // Get address
1514 const Addr addr = pkt->getAddr();
1515
1516 // Get secure bit
1517 const bool is_secure = pkt->isSecure();
1518
1519 // Block size and compression related access latency. Only relevant if
1520 // using a compressor, otherwise there is no extra delay, and the block
1521 // is fully sized
1522 std::size_t blk_size_bits = blkSize*8;
1523 Cycles compression_lat = Cycles(0);
1524 Cycles decompression_lat = Cycles(0);
1525
1526 // If a compressor is being used, it is called to compress data before
1527 // insertion. Although in Gem5 the data is stored uncompressed, even if a
1528 // compressor is used, the compression/decompression methods are called to
1529 // calculate the amount of extra cycles needed to read or write compressed
1530 // blocks.
1531 if (compressor && pkt->hasData()) {
1532 const auto comp_data = compressor->compress(
1533 pkt->getConstPtr<uint64_t>(), compression_lat, decompression_lat);
1534 blk_size_bits = comp_data->getSizeBits();
1535 }
1536
1537 // Find replacement victim
1538 std::vector<CacheBlk*> evict_blks;
1539 CacheBlk *victim = tags->findVictim(addr, is_secure, blk_size_bits,
1540 evict_blks);
1541
1542 // It is valid to return nullptr if there is no victim
1543 if (!victim)
1544 return nullptr;
1545
1546 // Print victim block's information
1547 DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print());
1548
1549 // Try to evict blocks; if it fails, give up on allocation
1550 if (!handleEvictions(evict_blks, writebacks)) {
1551 return nullptr;
1552 }
1553
1554 // Insert new block at victimized entry
1555 tags->insertBlock(pkt, victim);
1556
1557 // If using a compressor, set compression data. This must be done after
1558 // insertion, as the compression bit may be set.
1559 if (compressor) {
1560 compressor->setSizeBits(victim, blk_size_bits);
1561 compressor->setDecompressionLatency(victim, decompression_lat);
1562 }
1563
1564 return victim;
1565 }
1566
1567 void
1568 BaseCache::invalidateBlock(CacheBlk *blk)
1569 {
1570 // If block is still marked as prefetched, then it hasn't been used
1571 if (blk->wasPrefetched()) {
1572 stats.unusedPrefetches++;
1573 }
1574
1575 // Notify that the data contents for this address are no longer present
1576 updateBlockData(blk, nullptr, blk->isValid());
1577
1578 // If handling a block present in the Tags, let it do its invalidation
1579 // process, which will update stats and invalidate the block itself
1580 if (blk != tempBlock) {
1581 tags->invalidate(blk);
1582 } else {
1583 tempBlock->invalidate();
1584 }
1585 }
1586
1587 void
1588 BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks)
1589 {
1590 PacketPtr pkt = evictBlock(blk);
1591 if (pkt) {
1592 writebacks.push_back(pkt);
1593 }
1594 }
1595
1596 PacketPtr
1597 BaseCache::writebackBlk(CacheBlk *blk)
1598 {
1599 chatty_assert(!isReadOnly || writebackClean,
1600 "Writeback from read-only cache");
1601 assert(blk && blk->isValid() &&
1602 (blk->isSet(CacheBlk::DirtyBit) || writebackClean));
1603
1604 stats.writebacks[Request::wbRequestorId]++;
1605
1606 RequestPtr req = std::make_shared<Request>(
1607 regenerateBlkAddr(blk), blkSize, 0, Request::wbRequestorId);
1608
1609 if (blk->isSecure())
1610 req->setFlags(Request::SECURE);
1611
1612 req->taskId(blk->getTaskId());
1613
1614 PacketPtr pkt =
1615 new Packet(req, blk->isSet(CacheBlk::DirtyBit) ?
1616 MemCmd::WritebackDirty : MemCmd::WritebackClean);
1617
1618 DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n",
1619 pkt->print(), blk->isSet(CacheBlk::WritableBit),
1620 blk->isSet(CacheBlk::DirtyBit));
1621
1622 if (blk->isSet(CacheBlk::WritableBit)) {
1623 // not asserting shared means we pass the block in modified
1624 // state, mark our own block non-writeable
1625 blk->clearCoherenceBits(CacheBlk::WritableBit);
1626 } else {
1627 // we are in the Owned state, tell the receiver
1628 pkt->setHasSharers();
1629 }
1630
1631 // make sure the block is not marked dirty
1632 blk->clearCoherenceBits(CacheBlk::DirtyBit);
1633
1634 pkt->allocate();
1635 pkt->setDataFromBlock(blk->data, blkSize);
1636
1637 // When a block is compressed, it must first be decompressed before being
1638 // sent for writeback.
1639 if (compressor) {
1640 pkt->payloadDelay = compressor->getDecompressionLatency(blk);
1641 }
1642
1643 return pkt;
1644 }
1645
1646 PacketPtr
1647 BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
1648 {
1649 RequestPtr req = std::make_shared<Request>(
1650 regenerateBlkAddr(blk), blkSize, 0, Request::wbRequestorId);
1651
1652 if (blk->isSecure()) {
1653 req->setFlags(Request::SECURE);
1654 }
1655 req->taskId(blk->getTaskId());
1656
1657 PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id);
1658
1659 if (dest) {
1660 req->setFlags(dest);
1661 pkt->setWriteThrough();
1662 }
1663
1664 DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(),
1665 blk->isSet(CacheBlk::WritableBit), blk->isSet(CacheBlk::DirtyBit));
1666
1667 if (blk->isSet(CacheBlk::WritableBit)) {
1668 // not asserting shared means we pass the block in modified
1669 // state, mark our own block non-writeable
1670 blk->clearCoherenceBits(CacheBlk::WritableBit);
1671 } else {
1672 // we are in the Owned state, tell the receiver
1673 pkt->setHasSharers();
1674 }
1675
1676 // make sure the block is not marked dirty
1677 blk->clearCoherenceBits(CacheBlk::DirtyBit);
1678
1679 pkt->allocate();
1680 pkt->setDataFromBlock(blk->data, blkSize);
1681
1682 // When a block is compressed, it must first be decompressed before being
1683 // sent for writeback.
1684 if (compressor) {
1685 pkt->payloadDelay = compressor->getDecompressionLatency(blk);
1686 }
1687
1688 return pkt;
1689 }
1690
1691
1692 void
1693 BaseCache::memWriteback()
1694 {
1695 tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); });
1696 }
1697
1698 void
1699 BaseCache::memInvalidate()
1700 {
1701 tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); });
1702 }
1703
1704 bool
1705 BaseCache::isDirty() const
1706 {
1707 return tags->anyBlk([](CacheBlk &blk) {
1708 return blk.isSet(CacheBlk::DirtyBit); });
1709 }
1710
1711 bool
1712 BaseCache::coalesce() const
1713 {
1714 return writeAllocator && writeAllocator->coalesce();
1715 }
1716
1717 void
1718 BaseCache::writebackVisitor(CacheBlk &blk)
1719 {
1720 if (blk.isSet(CacheBlk::DirtyBit)) {
1721 assert(blk.isValid());
1722
1723 RequestPtr request = std::make_shared<Request>(
1724 regenerateBlkAddr(&blk), blkSize, 0, Request::funcRequestorId);
1725
1726 request->taskId(blk.getTaskId());
1727 if (blk.isSecure()) {
1728 request->setFlags(Request::SECURE);
1729 }
1730
1731 Packet packet(request, MemCmd::WriteReq);
1732 packet.dataStatic(blk.data);
1733
1734 memSidePort.sendFunctional(&packet);
1735
1736 blk.clearCoherenceBits(CacheBlk::DirtyBit);
1737 }
1738 }
1739
1740 void
1741 BaseCache::invalidateVisitor(CacheBlk &blk)
1742 {
1743 if (blk.isSet(CacheBlk::DirtyBit))
1744 warn_once("Invalidating dirty cache lines. " \
1745 "Expect things to break.\n");
1746
1747 if (blk.isValid()) {
1748 assert(!blk.isSet(CacheBlk::DirtyBit));
1749 invalidateBlock(&blk);
1750 }
1751 }
1752
1753 Tick
1754 BaseCache::nextQueueReadyTime() const
1755 {
1756 Tick nextReady = std::min(mshrQueue.nextReadyTime(),
1757 writeBuffer.nextReadyTime());
1758
1759 // Don't signal prefetch ready time if no MSHRs available
1760 // Will signal once enoguh MSHRs are deallocated
1761 if (prefetcher && mshrQueue.canPrefetch() && !isBlocked()) {
1762 nextReady = std::min(nextReady,
1763 prefetcher->nextPrefetchReadyTime());
1764 }
1765
1766 return nextReady;
1767 }
1768
1769
1770 bool
1771 BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1772 {
1773 assert(mshr);
1774
1775 // use request from 1st target
1776 PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1777
1778 DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1779
1780 // if the cache is in write coalescing mode or (additionally) in
1781 // no allocation mode, and we have a write packet with an MSHR
1782 // that is not a whole-line write (due to incompatible flags etc),
1783 // then reset the write mode
1784 if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) {
1785 if (!mshr->isWholeLineWrite()) {
1786 // if we are currently write coalescing, hold on the
1787 // MSHR as many cycles extra as we need to completely
1788 // write a cache line
1789 if (writeAllocator->delay(mshr->blkAddr)) {
1790 Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod();
1791 DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow "
1792 "for write coalescing\n", tgt_pkt->print(), delay);
1793 mshrQueue.delay(mshr, delay);
1794 return false;
1795 } else {
1796 writeAllocator->reset();
1797 }
1798 } else {
1799 writeAllocator->resetDelay(mshr->blkAddr);
1800 }
1801 }
1802
1803 CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
1804
1805 // either a prefetch that is not present upstream, or a normal
1806 // MSHR request, proceed to get the packet to send downstream
1807 PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(),
1808 mshr->isWholeLineWrite());
1809
1810 mshr->isForward = (pkt == nullptr);
1811
1812 if (mshr->isForward) {
1813 // not a cache block request, but a response is expected
1814 // make copy of current packet to forward, keep current
1815 // copy for response handling
1816 pkt = new Packet(tgt_pkt, false, true);
1817 assert(!pkt->isWrite());
1818 }
1819
1820 // play it safe and append (rather than set) the sender state,
1821 // as forwarded packets may already have existing state
1822 pkt->pushSenderState(mshr);
1823
1824 if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) {
1825 // A cache clean opearation is looking for a dirty block. Mark
1826 // the packet so that the destination xbar can determine that
1827 // there will be a follow-up write packet as well.
1828 pkt->setSatisfied();
1829 }
1830
1831 if (!memSidePort.sendTimingReq(pkt)) {
1832 // we are awaiting a retry, but we
1833 // delete the packet and will be creating a new packet
1834 // when we get the opportunity
1835 delete pkt;
1836
1837 // note that we have now masked any requestBus and
1838 // schedSendEvent (we will wait for a retry before
1839 // doing anything), and this is so even if we do not
1840 // care about this packet and might override it before
1841 // it gets retried
1842 return true;
1843 } else {
1844 // As part of the call to sendTimingReq the packet is
1845 // forwarded to all neighbouring caches (and any caches
1846 // above them) as a snoop. Thus at this point we know if
1847 // any of the neighbouring caches are responding, and if
1848 // so, we know it is dirty, and we can determine if it is
1849 // being passed as Modified, making our MSHR the ordering
1850 // point
1851 bool pending_modified_resp = !pkt->hasSharers() &&
1852 pkt->cacheResponding();
1853 markInService(mshr, pending_modified_resp);
1854
1855 if (pkt->isClean() && blk && blk->isSet(CacheBlk::DirtyBit)) {
1856 // A cache clean opearation is looking for a dirty
1857 // block. If a dirty block is encountered a WriteClean
1858 // will update any copies to the path to the memory
1859 // until the point of reference.
1860 DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
1861 __func__, pkt->print(), blk->print());
1862 PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
1863 pkt->id);
1864 PacketList writebacks;
1865 writebacks.push_back(wb_pkt);
1866 doWritebacks(writebacks, 0);
1867 }
1868
1869 return false;
1870 }
1871 }
1872
1873 bool
1874 BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry)
1875 {
1876 assert(wq_entry);
1877
1878 // always a single target for write queue entries
1879 PacketPtr tgt_pkt = wq_entry->getTarget()->pkt;
1880
1881 DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print());
1882
1883 // forward as is, both for evictions and uncacheable writes
1884 if (!memSidePort.sendTimingReq(tgt_pkt)) {
1885 // note that we have now masked any requestBus and
1886 // schedSendEvent (we will wait for a retry before
1887 // doing anything), and this is so even if we do not
1888 // care about this packet and might override it before
1889 // it gets retried
1890 return true;
1891 } else {
1892 markInService(wq_entry);
1893 return false;
1894 }
1895 }
1896
1897 void
1898 BaseCache::serialize(CheckpointOut &cp) const
1899 {
1900 bool dirty(isDirty());
1901
1902 if (dirty) {
1903 warn("*** The cache still contains dirty data. ***\n");
1904 warn(" Make sure to drain the system using the correct flags.\n");
1905 warn(" This checkpoint will not restore correctly " \
1906 "and dirty data in the cache will be lost!\n");
1907 }
1908
1909 // Since we don't checkpoint the data in the cache, any dirty data
1910 // will be lost when restoring from a checkpoint of a system that
1911 // wasn't drained properly. Flag the checkpoint as invalid if the
1912 // cache contains dirty data.
1913 bool bad_checkpoint(dirty);
1914 SERIALIZE_SCALAR(bad_checkpoint);
1915 }
1916
1917 void
1918 BaseCache::unserialize(CheckpointIn &cp)
1919 {
1920 bool bad_checkpoint;
1921 UNSERIALIZE_SCALAR(bad_checkpoint);
1922 if (bad_checkpoint) {
1923 fatal("Restoring from checkpoints with dirty caches is not "
1924 "supported in the classic memory system. Please remove any "
1925 "caches or drain them properly before taking checkpoints.\n");
1926 }
1927 }
1928
1929
1930 BaseCache::CacheCmdStats::CacheCmdStats(BaseCache &c,
1931 const std::string &name)
1932 : Stats::Group(&c, name.c_str()), cache(c),
1933 ADD_STAT(hits, ("number of " + name + " hits").c_str()),
1934 ADD_STAT(misses, ("number of " + name + " misses").c_str()),
1935 ADD_STAT(missLatency, ("number of " + name + " miss ticks").c_str()),
1936 ADD_STAT(accesses,
1937 ("number of " + name + " accesses(hits+misses)").c_str()),
1938 ADD_STAT(missRate, ("miss rate for " + name + " accesses").c_str()),
1939 ADD_STAT(avgMissLatency, ("average " + name + " miss latency").c_str()),
1940 ADD_STAT(mshrHits, ("number of " + name + " MSHR hits").c_str()),
1941 ADD_STAT(mshrMisses, ("number of " + name + " MSHR misses").c_str()),
1942 ADD_STAT(mshrUncacheable,
1943 ("number of " + name + " MSHR uncacheable").c_str()),
1944 ADD_STAT(mshrMissLatency,
1945 ("number of " + name + " MSHR miss ticks").c_str()),
1946 ADD_STAT(mshrUncacheableLatency,
1947 ("number of " + name + " MSHR uncacheable ticks").c_str()),
1948 ADD_STAT(mshrMissRate,
1949 ("mshr miss rate for " + name + " accesses").c_str()),
1950 ADD_STAT(avgMshrMissLatency,
1951 ("average " + name + " mshr miss latency").c_str()),
1952 ADD_STAT(avgMshrUncacheableLatency,
1953 ("average " + name + " mshr uncacheable latency").c_str())
1954 {
1955 }
1956
1957 void
1958 BaseCache::CacheCmdStats::regStatsFromParent()
1959 {
1960 using namespace Stats;
1961
1962 Stats::Group::regStats();
1963 System *system = cache.system;
1964 const auto max_requestors = system->maxRequestors();
1965
1966 hits
1967 .init(max_requestors)
1968 .flags(total | nozero | nonan)
1969 ;
1970 for (int i = 0; i < max_requestors; i++) {
1971 hits.subname(i, system->getRequestorName(i));
1972 }
1973
1974 // Miss statistics
1975 misses
1976 .init(max_requestors)
1977 .flags(total | nozero | nonan)
1978 ;
1979 for (int i = 0; i < max_requestors; i++) {
1980 misses.subname(i, system->getRequestorName(i));
1981 }
1982
1983 // Miss latency statistics
1984 missLatency
1985 .init(max_requestors)
1986 .flags(total | nozero | nonan)
1987 ;
1988 for (int i = 0; i < max_requestors; i++) {
1989 missLatency.subname(i, system->getRequestorName(i));
1990 }
1991
1992 // access formulas
1993 accesses.flags(total | nozero | nonan);
1994 accesses = hits + misses;
1995 for (int i = 0; i < max_requestors; i++) {
1996 accesses.subname(i, system->getRequestorName(i));
1997 }
1998
1999 // miss rate formulas
2000 missRate.flags(total | nozero | nonan);
2001 missRate = misses / accesses;
2002 for (int i = 0; i < max_requestors; i++) {
2003 missRate.subname(i, system->getRequestorName(i));
2004 }
2005
2006 // miss latency formulas
2007 avgMissLatency.flags(total | nozero | nonan);
2008 avgMissLatency = missLatency / misses;
2009 for (int i = 0; i < max_requestors; i++) {
2010 avgMissLatency.subname(i, system->getRequestorName(i));
2011 }
2012
2013 // MSHR statistics
2014 // MSHR hit statistics
2015 mshrHits
2016 .init(max_requestors)
2017 .flags(total | nozero | nonan)
2018 ;
2019 for (int i = 0; i < max_requestors; i++) {
2020 mshrHits.subname(i, system->getRequestorName(i));
2021 }
2022
2023 // MSHR miss statistics
2024 mshrMisses
2025 .init(max_requestors)
2026 .flags(total | nozero | nonan)
2027 ;
2028 for (int i = 0; i < max_requestors; i++) {
2029 mshrMisses.subname(i, system->getRequestorName(i));
2030 }
2031
2032 // MSHR miss latency statistics
2033 mshrMissLatency
2034 .init(max_requestors)
2035 .flags(total | nozero | nonan)
2036 ;
2037 for (int i = 0; i < max_requestors; i++) {
2038 mshrMissLatency.subname(i, system->getRequestorName(i));
2039 }
2040
2041 // MSHR uncacheable statistics
2042 mshrUncacheable
2043 .init(max_requestors)
2044 .flags(total | nozero | nonan)
2045 ;
2046 for (int i = 0; i < max_requestors; i++) {
2047 mshrUncacheable.subname(i, system->getRequestorName(i));
2048 }
2049
2050 // MSHR miss latency statistics
2051 mshrUncacheableLatency
2052 .init(max_requestors)
2053 .flags(total | nozero | nonan)
2054 ;
2055 for (int i = 0; i < max_requestors; i++) {
2056 mshrUncacheableLatency.subname(i, system->getRequestorName(i));
2057 }
2058
2059 // MSHR miss rate formulas
2060 mshrMissRate.flags(total | nozero | nonan);
2061 mshrMissRate = mshrMisses / accesses;
2062
2063 for (int i = 0; i < max_requestors; i++) {
2064 mshrMissRate.subname(i, system->getRequestorName(i));
2065 }
2066
2067 // mshrMiss latency formulas
2068 avgMshrMissLatency.flags(total | nozero | nonan);
2069 avgMshrMissLatency = mshrMissLatency / mshrMisses;
2070 for (int i = 0; i < max_requestors; i++) {
2071 avgMshrMissLatency.subname(i, system->getRequestorName(i));
2072 }
2073
2074 // mshrUncacheable latency formulas
2075 avgMshrUncacheableLatency.flags(total | nozero | nonan);
2076 avgMshrUncacheableLatency = mshrUncacheableLatency / mshrUncacheable;
2077 for (int i = 0; i < max_requestors; i++) {
2078 avgMshrUncacheableLatency.subname(i, system->getRequestorName(i));
2079 }
2080 }
2081
2082 BaseCache::CacheStats::CacheStats(BaseCache &c)
2083 : Stats::Group(&c), cache(c),
2084
2085 ADD_STAT(demandHits, "number of demand (read+write) hits"),
2086 ADD_STAT(overallHits, "number of overall hits"),
2087 ADD_STAT(demandMisses, "number of demand (read+write) misses"),
2088 ADD_STAT(overallMisses, "number of overall misses"),
2089 ADD_STAT(demandMissLatency, "number of demand (read+write) miss ticks"),
2090 ADD_STAT(overallMissLatency, "number of overall miss ticks"),
2091 ADD_STAT(demandAccesses, "number of demand (read+write) accesses"),
2092 ADD_STAT(overallAccesses, "number of overall (read+write) accesses"),
2093 ADD_STAT(demandMissRate, "miss rate for demand accesses"),
2094 ADD_STAT(overallMissRate, "miss rate for overall accesses"),
2095 ADD_STAT(demandAvgMissLatency, "average overall miss latency"),
2096 ADD_STAT(overallAvgMissLatency, "average overall miss latency"),
2097 ADD_STAT(blockedCycles, "number of cycles access was blocked"),
2098 ADD_STAT(blockedCauses, "number of cycles access was blocked"),
2099 ADD_STAT(avgBlocked,"average number of cycles each access was blocked"),
2100 ADD_STAT(unusedPrefetches,
2101 "number of HardPF blocks evicted w/o reference"),
2102 ADD_STAT(writebacks, "number of writebacks"),
2103 ADD_STAT(demandMshrHits, "number of demand (read+write) MSHR hits"),
2104 ADD_STAT(overallMshrHits, "number of overall MSHR hits"),
2105 ADD_STAT(demandMshrMisses, "number of demand (read+write) MSHR misses"),
2106 ADD_STAT(overallMshrMisses, "number of overall MSHR misses"),
2107 ADD_STAT(overallMshrUncacheable,
2108 "number of overall MSHR uncacheable misses"),
2109 ADD_STAT(demandMshrMissLatency,
2110 "number of demand (read+write) MSHR miss ticks"),
2111 ADD_STAT(overallMshrMissLatency, "number of overall MSHR miss ticks"),
2112 ADD_STAT(overallMshrUncacheableLatency,
2113 "number of overall MSHR uncacheable ticks"),
2114 ADD_STAT(demandMshrMissRate, "mshr miss ratio for demand accesses"),
2115 ADD_STAT(overallMshrMissRate, "mshr miss ratio for overall accesses"),
2116 ADD_STAT(demandAvgMshrMissLatency, "average overall mshr miss latency"),
2117 ADD_STAT(overallAvgMshrMissLatency, "average overall mshr miss latency"),
2118 ADD_STAT(overallAvgMshrUncacheableLatency,
2119 "average overall mshr uncacheable latency"),
2120 ADD_STAT(replacements, "number of replacements"),
2121 ADD_STAT(dataExpansions, "number of data expansions"),
2122 ADD_STAT(dataContractions, "number of data contractions"),
2123 cmd(MemCmd::NUM_MEM_CMDS)
2124 {
2125 for (int idx = 0; idx < MemCmd::NUM_MEM_CMDS; ++idx)
2126 cmd[idx].reset(new CacheCmdStats(c, MemCmd(idx).toString()));
2127 }
2128
2129 void
2130 BaseCache::CacheStats::regStats()
2131 {
2132 using namespace Stats;
2133
2134 Stats::Group::regStats();
2135
2136 System *system = cache.system;
2137 const auto max_requestors = system->maxRequestors();
2138
2139 for (auto &cs : cmd)
2140 cs->regStatsFromParent();
2141
2142 // These macros make it easier to sum the right subset of commands and
2143 // to change the subset of commands that are considered "demand" vs
2144 // "non-demand"
2145 #define SUM_DEMAND(s) \
2146 (cmd[MemCmd::ReadReq]->s + cmd[MemCmd::WriteReq]->s + \
2147 cmd[MemCmd::WriteLineReq]->s + cmd[MemCmd::ReadExReq]->s + \
2148 cmd[MemCmd::ReadCleanReq]->s + cmd[MemCmd::ReadSharedReq]->s)
2149
2150 // should writebacks be included here? prior code was inconsistent...
2151 #define SUM_NON_DEMAND(s) \
2152 (cmd[MemCmd::SoftPFReq]->s + cmd[MemCmd::HardPFReq]->s + \
2153 cmd[MemCmd::SoftPFExReq]->s)
2154
2155 demandHits.flags(total | nozero | nonan);
2156 demandHits = SUM_DEMAND(hits);
2157 for (int i = 0; i < max_requestors; i++) {
2158 demandHits.subname(i, system->getRequestorName(i));
2159 }
2160
2161 overallHits.flags(total | nozero | nonan);
2162 overallHits = demandHits + SUM_NON_DEMAND(hits);
2163 for (int i = 0; i < max_requestors; i++) {
2164 overallHits.subname(i, system->getRequestorName(i));
2165 }
2166
2167 demandMisses.flags(total | nozero | nonan);
2168 demandMisses = SUM_DEMAND(misses);
2169 for (int i = 0; i < max_requestors; i++) {
2170 demandMisses.subname(i, system->getRequestorName(i));
2171 }
2172
2173 overallMisses.flags(total | nozero | nonan);
2174 overallMisses = demandMisses + SUM_NON_DEMAND(misses);
2175 for (int i = 0; i < max_requestors; i++) {
2176 overallMisses.subname(i, system->getRequestorName(i));
2177 }
2178
2179 demandMissLatency.flags(total | nozero | nonan);
2180 demandMissLatency = SUM_DEMAND(missLatency);
2181 for (int i = 0; i < max_requestors; i++) {
2182 demandMissLatency.subname(i, system->getRequestorName(i));
2183 }
2184
2185 overallMissLatency.flags(total | nozero | nonan);
2186 overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
2187 for (int i = 0; i < max_requestors; i++) {
2188 overallMissLatency.subname(i, system->getRequestorName(i));
2189 }
2190
2191 demandAccesses.flags(total | nozero | nonan);
2192 demandAccesses = demandHits + demandMisses;
2193 for (int i = 0; i < max_requestors; i++) {
2194 demandAccesses.subname(i, system->getRequestorName(i));
2195 }
2196
2197 overallAccesses.flags(total | nozero | nonan);
2198 overallAccesses = overallHits + overallMisses;
2199 for (int i = 0; i < max_requestors; i++) {
2200 overallAccesses.subname(i, system->getRequestorName(i));
2201 }
2202
2203 demandMissRate.flags(total | nozero | nonan);
2204 demandMissRate = demandMisses / demandAccesses;
2205 for (int i = 0; i < max_requestors; i++) {
2206 demandMissRate.subname(i, system->getRequestorName(i));
2207 }
2208
2209 overallMissRate.flags(total | nozero | nonan);
2210 overallMissRate = overallMisses / overallAccesses;
2211 for (int i = 0; i < max_requestors; i++) {
2212 overallMissRate.subname(i, system->getRequestorName(i));
2213 }
2214
2215 demandAvgMissLatency.flags(total | nozero | nonan);
2216 demandAvgMissLatency = demandMissLatency / demandMisses;
2217 for (int i = 0; i < max_requestors; i++) {
2218 demandAvgMissLatency.subname(i, system->getRequestorName(i));
2219 }
2220
2221 overallAvgMissLatency.flags(total | nozero | nonan);
2222 overallAvgMissLatency = overallMissLatency / overallMisses;
2223 for (int i = 0; i < max_requestors; i++) {
2224 overallAvgMissLatency.subname(i, system->getRequestorName(i));
2225 }
2226
2227 blockedCycles.init(NUM_BLOCKED_CAUSES);
2228 blockedCycles
2229 .subname(Blocked_NoMSHRs, "no_mshrs")
2230 .subname(Blocked_NoTargets, "no_targets")
2231 ;
2232
2233
2234 blockedCauses.init(NUM_BLOCKED_CAUSES);
2235 blockedCauses
2236 .subname(Blocked_NoMSHRs, "no_mshrs")
2237 .subname(Blocked_NoTargets, "no_targets")
2238 ;
2239
2240 avgBlocked
2241 .subname(Blocked_NoMSHRs, "no_mshrs")
2242 .subname(Blocked_NoTargets, "no_targets")
2243 ;
2244 avgBlocked = blockedCycles / blockedCauses;
2245
2246 unusedPrefetches.flags(nozero);
2247
2248 writebacks
2249 .init(max_requestors)
2250 .flags(total | nozero | nonan)
2251 ;
2252 for (int i = 0; i < max_requestors; i++) {
2253 writebacks.subname(i, system->getRequestorName(i));
2254 }
2255
2256 demandMshrHits.flags(total | nozero | nonan);
2257 demandMshrHits = SUM_DEMAND(mshrHits);
2258 for (int i = 0; i < max_requestors; i++) {
2259 demandMshrHits.subname(i, system->getRequestorName(i));
2260 }
2261
2262 overallMshrHits.flags(total | nozero | nonan);
2263 overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshrHits);
2264 for (int i = 0; i < max_requestors; i++) {
2265 overallMshrHits.subname(i, system->getRequestorName(i));
2266 }
2267
2268 demandMshrMisses.flags(total | nozero | nonan);
2269 demandMshrMisses = SUM_DEMAND(mshrMisses);
2270 for (int i = 0; i < max_requestors; i++) {
2271 demandMshrMisses.subname(i, system->getRequestorName(i));
2272 }
2273
2274 overallMshrMisses.flags(total | nozero | nonan);
2275 overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshrMisses);
2276 for (int i = 0; i < max_requestors; i++) {
2277 overallMshrMisses.subname(i, system->getRequestorName(i));
2278 }
2279
2280 demandMshrMissLatency.flags(total | nozero | nonan);
2281 demandMshrMissLatency = SUM_DEMAND(mshrMissLatency);
2282 for (int i = 0; i < max_requestors; i++) {
2283 demandMshrMissLatency.subname(i, system->getRequestorName(i));
2284 }
2285
2286 overallMshrMissLatency.flags(total | nozero | nonan);
2287 overallMshrMissLatency =
2288 demandMshrMissLatency + SUM_NON_DEMAND(mshrMissLatency);
2289 for (int i = 0; i < max_requestors; i++) {
2290 overallMshrMissLatency.subname(i, system->getRequestorName(i));
2291 }
2292
2293 overallMshrUncacheable.flags(total | nozero | nonan);
2294 overallMshrUncacheable =
2295 SUM_DEMAND(mshrUncacheable) + SUM_NON_DEMAND(mshrUncacheable);
2296 for (int i = 0; i < max_requestors; i++) {
2297 overallMshrUncacheable.subname(i, system->getRequestorName(i));
2298 }
2299
2300
2301 overallMshrUncacheableLatency.flags(total | nozero | nonan);
2302 overallMshrUncacheableLatency =
2303 SUM_DEMAND(mshrUncacheableLatency) +
2304 SUM_NON_DEMAND(mshrUncacheableLatency);
2305 for (int i = 0; i < max_requestors; i++) {
2306 overallMshrUncacheableLatency.subname(i, system->getRequestorName(i));
2307 }
2308
2309 demandMshrMissRate.flags(total | nozero | nonan);
2310 demandMshrMissRate = demandMshrMisses / demandAccesses;
2311 for (int i = 0; i < max_requestors; i++) {
2312 demandMshrMissRate.subname(i, system->getRequestorName(i));
2313 }
2314
2315 overallMshrMissRate.flags(total | nozero | nonan);
2316 overallMshrMissRate = overallMshrMisses / overallAccesses;
2317 for (int i = 0; i < max_requestors; i++) {
2318 overallMshrMissRate.subname(i, system->getRequestorName(i));
2319 }
2320
2321 demandAvgMshrMissLatency.flags(total | nozero | nonan);
2322 demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
2323 for (int i = 0; i < max_requestors; i++) {
2324 demandAvgMshrMissLatency.subname(i, system->getRequestorName(i));
2325 }
2326
2327 overallAvgMshrMissLatency.flags(total | nozero | nonan);
2328 overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
2329 for (int i = 0; i < max_requestors; i++) {
2330 overallAvgMshrMissLatency.subname(i, system->getRequestorName(i));
2331 }
2332
2333 overallAvgMshrUncacheableLatency.flags(total | nozero | nonan);
2334 overallAvgMshrUncacheableLatency =
2335 overallMshrUncacheableLatency / overallMshrUncacheable;
2336 for (int i = 0; i < max_requestors; i++) {
2337 overallAvgMshrUncacheableLatency.subname(i,
2338 system->getRequestorName(i));
2339 }
2340
2341 dataExpansions.flags(nozero | nonan);
2342 dataContractions.flags(nozero | nonan);
2343 }
2344
2345 void
2346 BaseCache::regProbePoints()
2347 {
2348 ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit");
2349 ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss");
2350 ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill");
2351 ppDataUpdate =
2352 new ProbePointArg<DataUpdate>(this->getProbeManager(), "Data Update");
2353 }
2354
2355 ///////////////
2356 //
2357 // CpuSidePort
2358 //
2359 ///////////////
2360 bool
2361 BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
2362 {
2363 // Snoops shouldn't happen when bypassing caches
2364 assert(!cache->system->bypassCaches());
2365
2366 assert(pkt->isResponse());
2367
2368 // Express snoop responses from requestor to responder, e.g., from L1 to L2
2369 cache->recvTimingSnoopResp(pkt);
2370 return true;
2371 }
2372
2373
2374 bool
2375 BaseCache::CpuSidePort::tryTiming(PacketPtr pkt)
2376 {
2377 if (cache->system->bypassCaches() || pkt->isExpressSnoop()) {
2378 // always let express snoop packets through even if blocked
2379 return true;
2380 } else if (blocked || mustSendRetry) {
2381 // either already committed to send a retry, or blocked
2382 mustSendRetry = true;
2383 return false;
2384 }
2385 mustSendRetry = false;
2386 return true;
2387 }
2388
2389 bool
2390 BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
2391 {
2392 assert(pkt->isRequest());
2393
2394 if (cache->system->bypassCaches()) {
2395 // Just forward the packet if caches are disabled.
2396 // @todo This should really enqueue the packet rather
2397 M5_VAR_USED bool success = cache->memSidePort.sendTimingReq(pkt);
2398 assert(success);
2399 return true;
2400 } else if (tryTiming(pkt)) {
2401 cache->recvTimingReq(pkt);
2402 return true;
2403 }
2404 return false;
2405 }
2406
2407 Tick
2408 BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt)
2409 {
2410 if (cache->system->bypassCaches()) {
2411 // Forward the request if the system is in cache bypass mode.
2412 return cache->memSidePort.sendAtomic(pkt);
2413 } else {
2414 return cache->recvAtomic(pkt);
2415 }
2416 }
2417
2418 void
2419 BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt)
2420 {
2421 if (cache->system->bypassCaches()) {
2422 // The cache should be flushed if we are in cache bypass mode,
2423 // so we don't need to check if we need to update anything.
2424 cache->memSidePort.sendFunctional(pkt);
2425 return;
2426 }
2427
2428 // functional request
2429 cache->functionalAccess(pkt, true);
2430 }
2431
2432 AddrRangeList
2433 BaseCache::CpuSidePort::getAddrRanges() const
2434 {
2435 return cache->getAddrRanges();
2436 }
2437
2438
2439 BaseCache::
2440 CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache,
2441 const std::string &_label)
2442 : CacheResponsePort(_name, _cache, _label), cache(_cache)
2443 {
2444 }
2445
2446 ///////////////
2447 //
2448 // MemSidePort
2449 //
2450 ///////////////
2451 bool
2452 BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt)
2453 {
2454 cache->recvTimingResp(pkt);
2455 return true;
2456 }
2457
2458 // Express snooping requests to memside port
2459 void
2460 BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt)
2461 {
2462 // Snoops shouldn't happen when bypassing caches
2463 assert(!cache->system->bypassCaches());
2464
2465 // handle snooping requests
2466 cache->recvTimingSnoopReq(pkt);
2467 }
2468
2469 Tick
2470 BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt)
2471 {
2472 // Snoops shouldn't happen when bypassing caches
2473 assert(!cache->system->bypassCaches());
2474
2475 return cache->recvAtomicSnoop(pkt);
2476 }
2477
2478 void
2479 BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt)
2480 {
2481 // Snoops shouldn't happen when bypassing caches
2482 assert(!cache->system->bypassCaches());
2483
2484 // functional snoop (note that in contrast to atomic we don't have
2485 // a specific functionalSnoop method, as they have the same
2486 // behaviour regardless)
2487 cache->functionalAccess(pkt, false);
2488 }
2489
2490 void
2491 BaseCache::CacheReqPacketQueue::sendDeferredPacket()
2492 {
2493 // sanity check
2494 assert(!waitingOnRetry);
2495
2496 // there should never be any deferred request packets in the
2497 // queue, instead we resly on the cache to provide the packets
2498 // from the MSHR queue or write queue
2499 assert(deferredPacketReadyTime() == MaxTick);
2500
2501 // check for request packets (requests & writebacks)
2502 QueueEntry* entry = cache.getNextQueueEntry();
2503
2504 if (!entry) {
2505 // can happen if e.g. we attempt a writeback and fail, but
2506 // before the retry, the writeback is eliminated because
2507 // we snoop another cache's ReadEx.
2508 } else {
2509 // let our snoop responses go first if there are responses to
2510 // the same addresses
2511 if (checkConflictingSnoop(entry->getTarget()->pkt)) {
2512 return;
2513 }
2514 waitingOnRetry = entry->sendPacket(cache);
2515 }
2516
2517 // if we succeeded and are not waiting for a retry, schedule the
2518 // next send considering when the next queue is ready, note that
2519 // snoop responses have their own packet queue and thus schedule
2520 // their own events
2521 if (!waitingOnRetry) {
2522 schedSendEvent(cache.nextQueueReadyTime());
2523 }
2524 }
2525
2526 BaseCache::MemSidePort::MemSidePort(const std::string &_name,
2527 BaseCache *_cache,
2528 const std::string &_label)
2529 : CacheRequestPort(_name, _cache, _reqQueue, _snoopRespQueue),
2530 _reqQueue(*_cache, *this, _snoopRespQueue, _label),
2531 _snoopRespQueue(*_cache, *this, true, _label), cache(_cache)
2532 {
2533 }
2534
2535 void
2536 WriteAllocator::updateMode(Addr write_addr, unsigned write_size,
2537 Addr blk_addr)
2538 {
2539 // check if we are continuing where the last write ended
2540 if (nextAddr == write_addr) {
2541 delayCtr[blk_addr] = delayThreshold;
2542 // stop if we have already saturated
2543 if (mode != WriteMode::NO_ALLOCATE) {
2544 byteCount += write_size;
2545 // switch to streaming mode if we have passed the lower
2546 // threshold
2547 if (mode == WriteMode::ALLOCATE &&
2548 byteCount > coalesceLimit) {
2549 mode = WriteMode::COALESCE;
2550 DPRINTF(Cache, "Switched to write coalescing\n");
2551 } else if (mode == WriteMode::COALESCE &&
2552 byteCount > noAllocateLimit) {
2553 // and continue and switch to non-allocating mode if we
2554 // pass the upper threshold
2555 mode = WriteMode::NO_ALLOCATE;
2556 DPRINTF(Cache, "Switched to write-no-allocate\n");
2557 }
2558 }
2559 } else {
2560 // we did not see a write matching the previous one, start
2561 // over again
2562 byteCount = write_size;
2563 mode = WriteMode::ALLOCATE;
2564 resetDelay(blk_addr);
2565 }
2566 nextAddr = write_addr + write_size;
2567 }