src/mem/cache/base.cc

   1 /*
   2  * Copyright (c) 2012-2013, 2018 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Erik Hallnor
  41  *          Nikos Nikoleris
  42  */
  43
  44 /**
  45  * @file
  46  * Definition of BaseCache functions.
  47  */
  48
  49 #include "mem/cache/base.hh"
  50
  51 #include "base/compiler.hh"
  52 #include "base/logging.hh"
  53 #include "debug/Cache.hh"
  54 #include "debug/CachePort.hh"
  55 #include "debug/CacheRepl.hh"
  56 #include "debug/CacheVerbose.hh"
  57 #include "mem/cache/mshr.hh"
  58 #include "mem/cache/prefetch/base.hh"
  59 #include "mem/cache/queue_entry.hh"
  60 #include "params/BaseCache.hh"
  61 #include "params/WriteAllocator.hh"
  62 #include "sim/core.hh"
  63
  64 class BaseMasterPort;
  65 class BaseSlavePort;
  66
  67 using namespace std;
  68
  69 BaseCache::CacheSlavePort::CacheSlavePort(const std::string &_name,
  70                                           BaseCache *_cache,
  71                                           const std::string &_label)
  72     : QueuedSlavePort(_name, _cache, queue), queue(*_cache, *this, _label),
  73       blocked(false), mustSendRetry(false),
  74       sendRetryEvent([this]{ processSendRetry(); }, _name)
  75 {
  76 }
  77
  78 BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
  79     : MemObject(p),
  80       cpuSidePort (p->name + ".cpu_side", this, "CpuSidePort"),
  81       memSidePort(p->name + ".mem_side", this, "MemSidePort"),
  82       mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
  83       writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
  84       tags(p->tags),
  85       prefetcher(p->prefetcher),
  86       prefetchOnAccess(p->prefetch_on_access),
  87       writeAllocator(p->write_allocator),
  88       writebackClean(p->writeback_clean),
  89       tempBlockWriteback(nullptr),
  90       writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); },
  91                                     name(), false,
  92                                     EventBase::Delayed_Writeback_Pri),
  93       blkSize(blk_size),
  94       lookupLatency(p->tag_latency),
  95       dataLatency(p->data_latency),
  96       forwardLatency(p->tag_latency),
  97       fillLatency(p->data_latency),
  98       responseLatency(p->response_latency),
  99       numTarget(p->tgts_per_mshr),
 100       forwardSnoops(true),
 101       clusivity(p->clusivity),
 102       isReadOnly(p->is_read_only),
 103       blocked(0),
 104       order(0),
 105       noTargetMSHR(nullptr),
 106       missCount(p->max_miss_count),
 107       addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
 108       system(p->system)
 109 {
 110     // the MSHR queue has no reserve entries as we check the MSHR
 111     // queue on every single allocation, whereas the write queue has
 112     // as many reserve entries as we have MSHRs, since every MSHR may
 113     // eventually require a writeback, and we do not check the write
 114     // buffer before committing to an MSHR
 115
 116     // forward snoops is overridden in init() once we can query
 117     // whether the connected master is actually snooping or not
 118
 119     tempBlock = new TempCacheBlk(blkSize);
 120
 121     tags->init(this);
 122     if (prefetcher)
 123         prefetcher->setCache(this);
 124 }
 125
 126 BaseCache::~BaseCache()
 127 {
 128     delete tempBlock;
 129 }
 130
 131 void
 132 BaseCache::CacheSlavePort::setBlocked()
 133 {
 134     assert(!blocked);
 135     DPRINTF(CachePort, "Port is blocking new requests\n");
 136     blocked = true;
 137     // if we already scheduled a retry in this cycle, but it has not yet
 138     // happened, cancel it
 139     if (sendRetryEvent.scheduled()) {
 140         owner.deschedule(sendRetryEvent);
 141         DPRINTF(CachePort, "Port descheduled retry\n");
 142         mustSendRetry = true;
 143     }
 144 }
 145
 146 void
 147 BaseCache::CacheSlavePort::clearBlocked()
 148 {
 149     assert(blocked);
 150     DPRINTF(CachePort, "Port is accepting new requests\n");
 151     blocked = false;
 152     if (mustSendRetry) {
 153         // @TODO: need to find a better time (next cycle?)
 154         owner.schedule(sendRetryEvent, curTick() + 1);
 155     }
 156 }
 157
 158 void
 159 BaseCache::CacheSlavePort::processSendRetry()
 160 {
 161     DPRINTF(CachePort, "Port is sending retry\n");
 162
 163     // reset the flag and call retry
 164     mustSendRetry = false;
 165     sendRetryReq();
 166 }
 167
 168 Addr
 169 BaseCache::regenerateBlkAddr(CacheBlk* blk)
 170 {
 171     if (blk != tempBlock) {
 172         return tags->regenerateBlkAddr(blk);
 173     } else {
 174         return tempBlock->getAddr();
 175     }
 176 }
 177
 178 void
 179 BaseCache::init()
 180 {
 181     if (!cpuSidePort.isConnected() || !memSidePort.isConnected())
 182         fatal("Cache ports on %s are not connected\n", name());
 183     cpuSidePort.sendRangeChange();
 184     forwardSnoops = cpuSidePort.isSnooping();
 185 }
 186
 187 BaseMasterPort &
 188 BaseCache::getMasterPort(const std::string &if_name, PortID idx)
 189 {
 190     if (if_name == "mem_side") {
 191         return memSidePort;
 192     }  else {
 193         return MemObject::getMasterPort(if_name, idx);
 194     }
 195 }
 196
 197 BaseSlavePort &
 198 BaseCache::getSlavePort(const std::string &if_name, PortID idx)
 199 {
 200     if (if_name == "cpu_side") {
 201         return cpuSidePort;
 202     } else {
 203         return MemObject::getSlavePort(if_name, idx);
 204     }
 205 }
 206
 207 bool
 208 BaseCache::inRange(Addr addr) const
 209 {
 210     for (const auto& r : addrRanges) {
 211         if (r.contains(addr)) {
 212             return true;
 213        }
 214     }
 215     return false;
 216 }
 217
 218 void
 219 BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
 220 {
 221     if (pkt->needsResponse()) {
 222         pkt->makeTimingResponse();
 223         // @todo: Make someone pay for this
 224         pkt->headerDelay = pkt->payloadDelay = 0;
 225
 226         // In this case we are considering request_time that takes
 227         // into account the delay of the xbar, if any, and just
 228         // lat, neglecting responseLatency, modelling hit latency
 229         // just as lookupLatency or or the value of lat overriden
 230         // by access(), that calls accessBlock() function.
 231         cpuSidePort.schedTimingResp(pkt, request_time, true);
 232     } else {
 233         DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
 234                 pkt->print());
 235
 236         // queue the packet for deletion, as the sending cache is
 237         // still relying on it; if the block is found in access(),
 238         // CleanEvict and Writeback messages will be deleted
 239         // here as well
 240         pendingDelete.reset(pkt);
 241     }
 242 }
 243
 244 void
 245 BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 246                                Tick forward_time, Tick request_time)
 247 {
 248     if (writeAllocator &&
 249         pkt && pkt->isWrite() && !pkt->req->isUncacheable()) {
 250         writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(),
 251                                    pkt->getBlockAddr(blkSize));
 252     }
 253
 254     if (mshr) {
 255         /// MSHR hit
 256         /// @note writebacks will be checked in getNextMSHR()
 257         /// for any conflicting requests to the same block
 258
 259         //@todo remove hw_pf here
 260
 261         // Coalesce unless it was a software prefetch (see above).
 262         if (pkt) {
 263             assert(!pkt->isWriteback());
 264             // CleanEvicts corresponding to blocks which have
 265             // outstanding requests in MSHRs are simply sunk here
 266             if (pkt->cmd == MemCmd::CleanEvict) {
 267                 pendingDelete.reset(pkt);
 268             } else if (pkt->cmd == MemCmd::WriteClean) {
 269                 // A WriteClean should never coalesce with any
 270                 // outstanding cache maintenance requests.
 271
 272                 // We use forward_time here because there is an
 273                 // uncached memory write, forwarded to WriteBuffer.
 274                 allocateWriteBuffer(pkt, forward_time);
 275             } else {
 276                 DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
 277                         pkt->print());
 278
 279                 assert(pkt->req->masterId() < system->maxMasters());
 280                 mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
 281
 282                 // We use forward_time here because it is the same
 283                 // considering new targets. We have multiple
 284                 // requests for the same address here. It
 285                 // specifies the latency to allocate an internal
 286                 // buffer and to schedule an event to the queued
 287                 // port and also takes into account the additional
 288                 // delay of the xbar.
 289                 mshr->allocateTarget(pkt, forward_time, order++,
 290                                      allocOnFill(pkt->cmd));
 291                 if (mshr->getNumTargets() == numTarget) {
 292                     noTargetMSHR = mshr;
 293                     setBlocked(Blocked_NoTargets);
 294                     // need to be careful with this... if this mshr isn't
 295                     // ready yet (i.e. time > curTick()), we don't want to
 296                     // move it ahead of mshrs that are ready
 297                     // mshrQueue.moveToFront(mshr);
 298                 }
 299             }
 300         }
 301     } else {
 302         // no MSHR
 303         assert(pkt->req->masterId() < system->maxMasters());
 304         mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
 305
 306         if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
 307             // We use forward_time here because there is an
 308             // writeback or writeclean, forwarded to WriteBuffer.
 309             allocateWriteBuffer(pkt, forward_time);
 310         } else {
 311             if (blk && blk->isValid()) {
 312                 // If we have a write miss to a valid block, we
 313                 // need to mark the block non-readable.  Otherwise
 314                 // if we allow reads while there's an outstanding
 315                 // write miss, the read could return stale data
 316                 // out of the cache block... a more aggressive
 317                 // system could detect the overlap (if any) and
 318                 // forward data out of the MSHRs, but we don't do
 319                 // that yet.  Note that we do need to leave the
 320                 // block valid so that it stays in the cache, in
 321                 // case we get an upgrade response (and hence no
 322                 // new data) when the write miss completes.
 323                 // As long as CPUs do proper store/load forwarding
 324                 // internally, and have a sufficiently weak memory
 325                 // model, this is probably unnecessary, but at some
 326                 // point it must have seemed like we needed it...
 327                 assert((pkt->needsWritable() && !blk->isWritable()) ||
 328                        pkt->req->isCacheMaintenance());
 329                 blk->status &= ~BlkReadable;
 330             }
 331             // Here we are using forward_time, modelling the latency of
 332             // a miss (outbound) just as forwardLatency, neglecting the
 333             // lookupLatency component.
 334             allocateMissBuffer(pkt, forward_time);
 335         }
 336     }
 337 }
 338
 339 void
 340 BaseCache::recvTimingReq(PacketPtr pkt)
 341 {
 342     // anything that is merely forwarded pays for the forward latency and
 343     // the delay provided by the crossbar
 344     Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 345
 346     // We use lookupLatency here because it is used to specify the latency
 347     // to access.
 348     Cycles lat = lookupLatency;
 349     CacheBlk *blk = nullptr;
 350     bool satisfied = false;
 351     {
 352         PacketList writebacks;
 353         // Note that lat is passed by reference here. The function
 354         // access() calls accessBlock() which can modify lat value.
 355         satisfied = access(pkt, blk, lat, writebacks);
 356
 357         // copy writebacks to write buffer here to ensure they logically
 358         // precede anything happening below
 359         doWritebacks(writebacks, forward_time);
 360     }
 361
 362     // Here we charge the headerDelay that takes into account the latencies
 363     // of the bus, if the packet comes from it.
 364     // The latency charged it is just lat that is the value of lookupLatency
 365     // modified by access() function, or if not just lookupLatency.
 366     // In case of a hit we are neglecting response latency.
 367     // In case of a miss we are neglecting forward latency.
 368     Tick request_time = clockEdge(lat) + pkt->headerDelay;
 369     // Here we reset the timing of the packet.
 370     pkt->headerDelay = pkt->payloadDelay = 0;
 371     // track time of availability of next prefetch, if any
 372     Tick next_pf_time = MaxTick;
 373
 374     if (satisfied) {
 375         // if need to notify the prefetcher we have to do it before
 376         // anything else as later handleTimingReqHit might turn the
 377         // packet in a response
 378         if (prefetcher &&
 379             (prefetchOnAccess || (blk && blk->wasPrefetched()))) {
 380             if (blk)
 381                 blk->status &= ~BlkHWPrefetched;
 382
 383             // Don't notify on SWPrefetch
 384             if (!pkt->cmd.isSWPrefetch()) {
 385                 assert(!pkt->req->isCacheMaintenance());
 386                 next_pf_time = prefetcher->notify(pkt);
 387             }
 388         }
 389
 390         handleTimingReqHit(pkt, blk, request_time);
 391     } else {
 392         handleTimingReqMiss(pkt, blk, forward_time, request_time);
 393
 394         // We should call the prefetcher reguardless if the request is
 395         // satisfied or not, reguardless if the request is in the MSHR
 396         // or not. The request could be a ReadReq hit, but still not
 397         // satisfied (potentially because of a prior write to the same
 398         // cache line. So, even when not satisfied, there is an MSHR
 399         // already allocated for this, we need to let the prefetcher
 400         // know about the request
 401
 402         // Don't notify prefetcher on SWPrefetch, cache maintenance
 403         // operations or for writes that we are coaslescing.
 404         if (prefetcher && pkt &&
 405             !pkt->cmd.isSWPrefetch() &&
 406             !pkt->req->isCacheMaintenance() &&
 407             !(writeAllocator && writeAllocator->coalesce() &&
 408               pkt->isWrite())) {
 409             next_pf_time = prefetcher->notify(pkt);
 410         }
 411     }
 412
 413     if (next_pf_time != MaxTick) {
 414         schedMemSideSendEvent(next_pf_time);
 415     }
 416 }
 417
 418 void
 419 BaseCache::handleUncacheableWriteResp(PacketPtr pkt)
 420 {
 421     Tick completion_time = clockEdge(responseLatency) +
 422         pkt->headerDelay + pkt->payloadDelay;
 423
 424     // Reset the bus additional time as it is now accounted for
 425     pkt->headerDelay = pkt->payloadDelay = 0;
 426
 427     cpuSidePort.schedTimingResp(pkt, completion_time, true);
 428 }
 429
 430 void
 431 BaseCache::recvTimingResp(PacketPtr pkt)
 432 {
 433     assert(pkt->isResponse());
 434
 435     // all header delay should be paid for by the crossbar, unless
 436     // this is a prefetch response from above
 437     panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp,
 438              "%s saw a non-zero packet delay\n", name());
 439
 440     const bool is_error = pkt->isError();
 441
 442     if (is_error) {
 443         DPRINTF(Cache, "%s: Cache received %s with error\n", __func__,
 444                 pkt->print());
 445     }
 446
 447     DPRINTF(Cache, "%s: Handling response %s\n", __func__,
 448             pkt->print());
 449
 450     // if this is a write, we should be looking at an uncacheable
 451     // write
 452     if (pkt->isWrite()) {
 453         assert(pkt->req->isUncacheable());
 454         handleUncacheableWriteResp(pkt);
 455         return;
 456     }
 457
 458     // we have dealt with any (uncacheable) writes above, from here on
 459     // we know we are dealing with an MSHR due to a miss or a prefetch
 460     MSHR *mshr = dynamic_cast<MSHR*>(pkt->popSenderState());
 461     assert(mshr);
 462
 463     if (mshr == noTargetMSHR) {
 464         // we always clear at least one target
 465         clearBlocked(Blocked_NoTargets);
 466         noTargetMSHR = nullptr;
 467     }
 468
 469     // Initial target is used just for stats
 470     MSHR::Target *initial_tgt = mshr->getTarget();
 471     int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
 472     Tick miss_latency = curTick() - initial_tgt->recvTime;
 473
 474     if (pkt->req->isUncacheable()) {
 475         assert(pkt->req->masterId() < system->maxMasters());
 476         mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
 477             miss_latency;
 478     } else {
 479         assert(pkt->req->masterId() < system->maxMasters());
 480         mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
 481             miss_latency;
 482     }
 483
 484     PacketList writebacks;
 485
 486     bool is_fill = !mshr->isForward &&
 487         (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp ||
 488          mshr->wasWholeLineWrite);
 489
 490     // make sure that if the mshr was due to a whole line write then
 491     // the response is an invalidation
 492     assert(!mshr->wasWholeLineWrite || pkt->isInvalidate());
 493
 494     CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
 495
 496     if (is_fill && !is_error) {
 497         DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n",
 498                 pkt->getAddr());
 499
 500         const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ?
 501             writeAllocator->allocate() : mshr->allocOnFill();
 502         blk = handleFill(pkt, blk, writebacks, allocate);
 503         assert(blk != nullptr);
 504     }
 505
 506     if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
 507         // The block was marked not readable while there was a pending
 508         // cache maintenance operation, restore its flag.
 509         blk->status |= BlkReadable;
 510
 511         // This was a cache clean operation (without invalidate)
 512         // and we have a copy of the block already. Since there
 513         // is no invalidation, we can promote targets that don't
 514         // require a writable copy
 515         mshr->promoteReadable();
 516     }
 517
 518     if (blk && blk->isWritable() && !pkt->req->isCacheInvalidate()) {
 519         // If at this point the referenced block is writable and the
 520         // response is not a cache invalidate, we promote targets that
 521         // were deferred as we couldn't guarrantee a writable copy
 522         mshr->promoteWritable();
 523     }
 524
 525     serviceMSHRTargets(mshr, pkt, blk, writebacks);
 526
 527     if (mshr->promoteDeferredTargets()) {
 528         // avoid later read getting stale data while write miss is
 529         // outstanding.. see comment in timingAccess()
 530         if (blk) {
 531             blk->status &= ~BlkReadable;
 532         }
 533         mshrQueue.markPending(mshr);
 534         schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
 535     } else {
 536         // while we deallocate an mshr from the queue we still have to
 537         // check the isFull condition before and after as we might
 538         // have been using the reserved entries already
 539         const bool was_full = mshrQueue.isFull();
 540         mshrQueue.deallocate(mshr);
 541         if (was_full && !mshrQueue.isFull()) {
 542             clearBlocked(Blocked_NoMSHRs);
 543         }
 544
 545         // Request the bus for a prefetch if this deallocation freed enough
 546         // MSHRs for a prefetch to take place
 547         if (prefetcher && mshrQueue.canPrefetch()) {
 548             Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(),
 549                                          clockEdge());
 550             if (next_pf_time != MaxTick)
 551                 schedMemSideSendEvent(next_pf_time);
 552         }
 553     }
 554
 555     // if we used temp block, check to see if its valid and then clear it out
 556     if (blk == tempBlock && tempBlock->isValid()) {
 557         evictBlock(blk, writebacks);
 558     }
 559
 560     const Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 561     // copy writebacks to write buffer
 562     doWritebacks(writebacks, forward_time);
 563
 564     DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print());
 565     delete pkt;
 566 }
 567
 568
 569 Tick
 570 BaseCache::recvAtomic(PacketPtr pkt)
 571 {
 572     // We are in atomic mode so we pay just for lookupLatency here.
 573     Cycles lat = lookupLatency;
 574
 575     // follow the same flow as in recvTimingReq, and check if a cache
 576     // above us is responding
 577     if (pkt->cacheResponding() && !pkt->isClean()) {
 578         assert(!pkt->req->isCacheInvalidate());
 579         DPRINTF(Cache, "Cache above responding to %s: not responding\n",
 580                 pkt->print());
 581
 582         // if a cache is responding, and it had the line in Owned
 583         // rather than Modified state, we need to invalidate any
 584         // copies that are not on the same path to memory
 585         assert(pkt->needsWritable() && !pkt->responderHadWritable());
 586         lat += ticksToCycles(memSidePort.sendAtomic(pkt));
 587
 588         return lat * clockPeriod();
 589     }
 590
 591     // should assert here that there are no outstanding MSHRs or
 592     // writebacks... that would mean that someone used an atomic
 593     // access in timing mode
 594
 595     CacheBlk *blk = nullptr;
 596     PacketList writebacks;
 597     bool satisfied = access(pkt, blk, lat, writebacks);
 598
 599     if (pkt->isClean() && blk && blk->isDirty()) {
 600         // A cache clean opearation is looking for a dirty
 601         // block. If a dirty block is encountered a WriteClean
 602         // will update any copies to the path to the memory
 603         // until the point of reference.
 604         DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
 605                 __func__, pkt->print(), blk->print());
 606         PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
 607         writebacks.push_back(wb_pkt);
 608         pkt->setSatisfied();
 609     }
 610
 611     // handle writebacks resulting from the access here to ensure they
 612     // logically precede anything happening below
 613     doWritebacksAtomic(writebacks);
 614     assert(writebacks.empty());
 615
 616     if (!satisfied) {
 617         lat += handleAtomicReqMiss(pkt, blk, writebacks);
 618     }
 619
 620     // Note that we don't invoke the prefetcher at all in atomic mode.
 621     // It's not clear how to do it properly, particularly for
 622     // prefetchers that aggressively generate prefetch candidates and
 623     // rely on bandwidth contention to throttle them; these will tend
 624     // to pollute the cache in atomic mode since there is no bandwidth
 625     // contention.  If we ever do want to enable prefetching in atomic
 626     // mode, though, this is the place to do it... see timingAccess()
 627     // for an example (though we'd want to issue the prefetch(es)
 628     // immediately rather than calling requestMemSideBus() as we do
 629     // there).
 630
 631     // do any writebacks resulting from the response handling
 632     doWritebacksAtomic(writebacks);
 633
 634     // if we used temp block, check to see if its valid and if so
 635     // clear it out, but only do so after the call to recvAtomic is
 636     // finished so that any downstream observers (such as a snoop
 637     // filter), first see the fill, and only then see the eviction
 638     if (blk == tempBlock && tempBlock->isValid()) {
 639         // the atomic CPU calls recvAtomic for fetch and load/store
 640         // sequentuially, and we may already have a tempBlock
 641         // writeback from the fetch that we have not yet sent
 642         if (tempBlockWriteback) {
 643             // if that is the case, write the prevoius one back, and
 644             // do not schedule any new event
 645             writebackTempBlockAtomic();
 646         } else {
 647             // the writeback/clean eviction happens after the call to
 648             // recvAtomic has finished (but before any successive
 649             // calls), so that the response handling from the fill is
 650             // allowed to happen first
 651             schedule(writebackTempBlockAtomicEvent, curTick());
 652         }
 653
 654         tempBlockWriteback = evictBlock(blk);
 655     }
 656
 657     if (pkt->needsResponse()) {
 658         pkt->makeAtomicResponse();
 659     }
 660
 661     return lat * clockPeriod();
 662 }
 663
 664 void
 665 BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side)
 666 {
 667     Addr blk_addr = pkt->getBlockAddr(blkSize);
 668     bool is_secure = pkt->isSecure();
 669     CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure);
 670     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 671
 672     pkt->pushLabel(name());
 673
 674     CacheBlkPrintWrapper cbpw(blk);
 675
 676     // Note that just because an L2/L3 has valid data doesn't mean an
 677     // L1 doesn't have a more up-to-date modified copy that still
 678     // needs to be found.  As a result we always update the request if
 679     // we have it, but only declare it satisfied if we are the owner.
 680
 681     // see if we have data at all (owned or otherwise)
 682     bool have_data = blk && blk->isValid()
 683         && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize,
 684                                      blk->data);
 685
 686     // data we have is dirty if marked as such or if we have an
 687     // in-service MSHR that is pending a modified line
 688     bool have_dirty =
 689         have_data && (blk->isDirty() ||
 690                       (mshr && mshr->inService && mshr->isPendingModified()));
 691
 692     bool done = have_dirty ||
 693         cpuSidePort.trySatisfyFunctional(pkt) ||
 694         mshrQueue.trySatisfyFunctional(pkt, blk_addr) ||
 695         writeBuffer.trySatisfyFunctional(pkt, blk_addr) ||
 696         memSidePort.trySatisfyFunctional(pkt);
 697
 698     DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__,  pkt->print(),
 699             (blk && blk->isValid()) ? "valid " : "",
 700             have_data ? "data " : "", done ? "done " : "");
 701
 702     // We're leaving the cache, so pop cache->name() label
 703     pkt->popLabel();
 704
 705     if (done) {
 706         pkt->makeResponse();
 707     } else {
 708         // if it came as a request from the CPU side then make sure it
 709         // continues towards the memory side
 710         if (from_cpu_side) {
 711             memSidePort.sendFunctional(pkt);
 712         } else if (cpuSidePort.isSnooping()) {
 713             // if it came from the memory side, it must be a snoop request
 714             // and we should only forward it if we are forwarding snoops
 715             cpuSidePort.sendFunctionalSnoop(pkt);
 716         }
 717     }
 718 }
 719
 720
 721 void
 722 BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
 723 {
 724     assert(pkt->isRequest());
 725
 726     uint64_t overwrite_val;
 727     bool overwrite_mem;
 728     uint64_t condition_val64;
 729     uint32_t condition_val32;
 730
 731     int offset = pkt->getOffset(blkSize);
 732     uint8_t *blk_data = blk->data + offset;
 733
 734     assert(sizeof(uint64_t) >= pkt->getSize());
 735
 736     overwrite_mem = true;
 737     // keep a copy of our possible write value, and copy what is at the
 738     // memory address into the packet
 739     pkt->writeData((uint8_t *)&overwrite_val);
 740     pkt->setData(blk_data);
 741
 742     if (pkt->req->isCondSwap()) {
 743         if (pkt->getSize() == sizeof(uint64_t)) {
 744             condition_val64 = pkt->req->getExtraData();
 745             overwrite_mem = !std::memcmp(&condition_val64, blk_data,
 746                                          sizeof(uint64_t));
 747         } else if (pkt->getSize() == sizeof(uint32_t)) {
 748             condition_val32 = (uint32_t)pkt->req->getExtraData();
 749             overwrite_mem = !std::memcmp(&condition_val32, blk_data,
 750                                          sizeof(uint32_t));
 751         } else
 752             panic("Invalid size for conditional read/write\n");
 753     }
 754
 755     if (overwrite_mem) {
 756         std::memcpy(blk_data, &overwrite_val, pkt->getSize());
 757         blk->status |= BlkDirty;
 758     }
 759 }
 760
 761 QueueEntry*
 762 BaseCache::getNextQueueEntry()
 763 {
 764     // Check both MSHR queue and write buffer for potential requests,
 765     // note that null does not mean there is no request, it could
 766     // simply be that it is not ready
 767     MSHR *miss_mshr  = mshrQueue.getNext();
 768     WriteQueueEntry *wq_entry = writeBuffer.getNext();
 769
 770     // If we got a write buffer request ready, first priority is a
 771     // full write buffer, otherwise we favour the miss requests
 772     if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) {
 773         // need to search MSHR queue for conflicting earlier miss.
 774         MSHR *conflict_mshr =
 775             mshrQueue.findPending(wq_entry->blkAddr,
 776                                   wq_entry->isSecure);
 777
 778         if (conflict_mshr && conflict_mshr->order < wq_entry->order) {
 779             // Service misses in order until conflict is cleared.
 780             return conflict_mshr;
 781
 782             // @todo Note that we ignore the ready time of the conflict here
 783         }
 784
 785         // No conflicts; issue write
 786         return wq_entry;
 787     } else if (miss_mshr) {
 788         // need to check for conflicting earlier writeback
 789         WriteQueueEntry *conflict_mshr =
 790             writeBuffer.findPending(miss_mshr->blkAddr,
 791                                     miss_mshr->isSecure);
 792         if (conflict_mshr) {
 793             // not sure why we don't check order here... it was in the
 794             // original code but commented out.
 795
 796             // The only way this happens is if we are
 797             // doing a write and we didn't have permissions
 798             // then subsequently saw a writeback (owned got evicted)
 799             // We need to make sure to perform the writeback first
 800             // To preserve the dirty data, then we can issue the write
 801
 802             // should we return wq_entry here instead?  I.e. do we
 803             // have to flush writes in order?  I don't think so... not
 804             // for Alpha anyway.  Maybe for x86?
 805             return conflict_mshr;
 806
 807             // @todo Note that we ignore the ready time of the conflict here
 808         }
 809
 810         // No conflicts; issue read
 811         return miss_mshr;
 812     }
 813
 814     // fall through... no pending requests.  Try a prefetch.
 815     assert(!miss_mshr && !wq_entry);
 816     if (prefetcher && mshrQueue.canPrefetch()) {
 817         // If we have a miss queue slot, we can try a prefetch
 818         PacketPtr pkt = prefetcher->getPacket();
 819         if (pkt) {
 820             Addr pf_addr = pkt->getBlockAddr(blkSize);
 821             if (!tags->findBlock(pf_addr, pkt->isSecure()) &&
 822                 !mshrQueue.findMatch(pf_addr, pkt->isSecure()) &&
 823                 !writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
 824                 // Update statistic on number of prefetches issued
 825                 // (hwpf_mshr_misses)
 826                 assert(pkt->req->masterId() < system->maxMasters());
 827                 mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
 828
 829                 // allocate an MSHR and return it, note
 830                 // that we send the packet straight away, so do not
 831                 // schedule the send
 832                 return allocateMissBuffer(pkt, curTick(), false);
 833             } else {
 834                 // free the request and packet
 835                 delete pkt;
 836             }
 837         }
 838     }
 839
 840     return nullptr;
 841 }
 842
 843 void
 844 BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
 845 {
 846     assert(pkt->isRequest());
 847
 848     assert(blk && blk->isValid());
 849     // Occasionally this is not true... if we are a lower-level cache
 850     // satisfying a string of Read and ReadEx requests from
 851     // upper-level caches, a Read will mark the block as shared but we
 852     // can satisfy a following ReadEx anyway since we can rely on the
 853     // Read requester(s) to have buffered the ReadEx snoop and to
 854     // invalidate their blocks after receiving them.
 855     // assert(!pkt->needsWritable() || blk->isWritable());
 856     assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 857
 858     // Check RMW operations first since both isRead() and
 859     // isWrite() will be true for them
 860     if (pkt->cmd == MemCmd::SwapReq) {
 861         if (pkt->isAtomicOp()) {
 862             // extract data from cache and save it into the data field in
 863             // the packet as a return value from this atomic op
 864             int offset = tags->extractBlkOffset(pkt->getAddr());
 865             uint8_t *blk_data = blk->data + offset;
 866             pkt->setData(blk_data);
 867
 868             // execute AMO operation
 869             (*(pkt->getAtomicOp()))(blk_data);
 870
 871             // set block status to dirty
 872             blk->status |= BlkDirty;
 873         } else {
 874             cmpAndSwap(blk, pkt);
 875         }
 876     } else if (pkt->isWrite()) {
 877         // we have the block in a writable state and can go ahead,
 878         // note that the line may be also be considered writable in
 879         // downstream caches along the path to memory, but always
 880         // Exclusive, and never Modified
 881         assert(blk->isWritable());
 882         // Write or WriteLine at the first cache with block in writable state
 883         if (blk->checkWrite(pkt)) {
 884             pkt->writeDataToBlock(blk->data, blkSize);
 885         }
 886         // Always mark the line as dirty (and thus transition to the
 887         // Modified state) even if we are a failed StoreCond so we
 888         // supply data to any snoops that have appended themselves to
 889         // this cache before knowing the store will fail.
 890         blk->status |= BlkDirty;
 891         DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print());
 892     } else if (pkt->isRead()) {
 893         if (pkt->isLLSC()) {
 894             blk->trackLoadLocked(pkt);
 895         }
 896
 897         // all read responses have a data payload
 898         assert(pkt->hasRespData());
 899         pkt->setDataFromBlock(blk->data, blkSize);
 900     } else if (pkt->isUpgrade()) {
 901         // sanity check
 902         assert(!pkt->hasSharers());
 903
 904         if (blk->isDirty()) {
 905             // we were in the Owned state, and a cache above us that
 906             // has the line in Shared state needs to be made aware
 907             // that the data it already has is in fact dirty
 908             pkt->setCacheResponding();
 909             blk->status &= ~BlkDirty;
 910         }
 911     } else if (pkt->isClean()) {
 912         blk->status &= ~BlkDirty;
 913     } else {
 914         assert(pkt->isInvalidate());
 915         invalidateBlock(blk);
 916         DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
 917                 pkt->print());
 918     }
 919 }
 920
 921 /////////////////////////////////////////////////////
 922 //
 923 // Access path: requests coming in from the CPU side
 924 //
 925 /////////////////////////////////////////////////////
 926
 927 bool
 928 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 929                   PacketList &writebacks)
 930 {
 931     // sanity check
 932     assert(pkt->isRequest());
 933
 934     chatty_assert(!(isReadOnly && pkt->isWrite()),
 935                   "Should never see a write in a read-only cache %s\n",
 936                   name());
 937
 938     // Here lat is the value passed as parameter to accessBlock() function
 939     // that can modify its value.
 940     blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), lat);
 941
 942     DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
 943             blk ? "hit " + blk->print() : "miss");
 944
 945     if (pkt->req->isCacheMaintenance()) {
 946         // A cache maintenance operation is always forwarded to the
 947         // memory below even if the block is found in dirty state.
 948
 949         // We defer any changes to the state of the block until we
 950         // create and mark as in service the mshr for the downstream
 951         // packet.
 952         return false;
 953     }
 954
 955     if (pkt->isEviction()) {
 956         // We check for presence of block in above caches before issuing
 957         // Writeback or CleanEvict to write buffer. Therefore the only
 958         // possible cases can be of a CleanEvict packet coming from above
 959         // encountering a Writeback generated in this cache peer cache and
 960         // waiting in the write buffer. Cases of upper level peer caches
 961         // generating CleanEvict and Writeback or simply CleanEvict and
 962         // CleanEvict almost simultaneously will be caught by snoops sent out
 963         // by crossbar.
 964         WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
 965                                                           pkt->isSecure());
 966         if (wb_entry) {
 967             assert(wb_entry->getNumTargets() == 1);
 968             PacketPtr wbPkt = wb_entry->getTarget()->pkt;
 969             assert(wbPkt->isWriteback());
 970
 971             if (pkt->isCleanEviction()) {
 972                 // The CleanEvict and WritebackClean snoops into other
 973                 // peer caches of the same level while traversing the
 974                 // crossbar. If a copy of the block is found, the
 975                 // packet is deleted in the crossbar. Hence, none of
 976                 // the other upper level caches connected to this
 977                 // cache have the block, so we can clear the
 978                 // BLOCK_CACHED flag in the Writeback if set and
 979                 // discard the CleanEvict by returning true.
 980                 wbPkt->clearBlockCached();
 981                 return true;
 982             } else {
 983                 assert(pkt->cmd == MemCmd::WritebackDirty);
 984                 // Dirty writeback from above trumps our clean
 985                 // writeback... discard here
 986                 // Note: markInService will remove entry from writeback buffer.
 987                 markInService(wb_entry);
 988                 delete wbPkt;
 989             }
 990         }
 991     }
 992
 993     // Writeback handling is special case.  We can write the block into
 994     // the cache without having a writeable copy (or any copy at all).
 995     if (pkt->isWriteback()) {
 996         assert(blkSize == pkt->getSize());
 997
 998         // we could get a clean writeback while we are having
 999         // outstanding accesses to a block, do the simple thing for
1000         // now and drop the clean writeback so that we do not upset
1001         // any ordering/decisions about ownership already taken
1002         if (pkt->cmd == MemCmd::WritebackClean &&
1003             mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) {
1004             DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, "
1005                     "dropping\n", pkt->getAddr());
1006             return true;
1007         }
1008
1009         if (!blk) {
1010             // need to do a replacement
1011             blk = allocateBlock(pkt, writebacks);
1012             if (!blk) {
1013                 // no replaceable block available: give up, fwd to next level.
1014                 incMissCount(pkt);
1015                 return false;
1016             }
1017
1018             blk->status |= (BlkValid | BlkReadable);
1019         }
1020         // only mark the block dirty if we got a writeback command,
1021         // and leave it as is for a clean writeback
1022         if (pkt->cmd == MemCmd::WritebackDirty) {
1023             // TODO: the coherent cache can assert(!blk->isDirty());
1024             blk->status |= BlkDirty;
1025         }
1026         // if the packet does not have sharers, it is passing
1027         // writable, and we got the writeback in Modified or Exclusive
1028         // state, if not we are in the Owned or Shared state
1029         if (!pkt->hasSharers()) {
1030             blk->status |= BlkWritable;
1031         }
1032         // nothing else to do; writeback doesn't expect response
1033         assert(!pkt->needsResponse());
1034         pkt->writeDataToBlock(blk->data, blkSize);
1035         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1036         incHitCount(pkt);
1037         // populate the time when the block will be ready to access.
1038         blk->whenReady = clockEdge(fillLatency) + pkt->headerDelay +
1039             pkt->payloadDelay;
1040         return true;
1041     } else if (pkt->cmd == MemCmd::CleanEvict) {
1042         if (blk) {
1043             // Found the block in the tags, need to stop CleanEvict from
1044             // propagating further down the hierarchy. Returning true will
1045             // treat the CleanEvict like a satisfied write request and delete
1046             // it.
1047             return true;
1048         }
1049         // We didn't find the block here, propagate the CleanEvict further
1050         // down the memory hierarchy. Returning false will treat the CleanEvict
1051         // like a Writeback which could not find a replaceable block so has to
1052         // go to next level.
1053         return false;
1054     } else if (pkt->cmd == MemCmd::WriteClean) {
1055         // WriteClean handling is a special case. We can allocate a
1056         // block directly if it doesn't exist and we can update the
1057         // block immediately. The WriteClean transfers the ownership
1058         // of the block as well.
1059         assert(blkSize == pkt->getSize());
1060
1061         if (!blk) {
1062             if (pkt->writeThrough()) {
1063                 // if this is a write through packet, we don't try to
1064                 // allocate if the block is not present
1065                 return false;
1066             } else {
1067                 // a writeback that misses needs to allocate a new block
1068                 blk = allocateBlock(pkt, writebacks);
1069                 if (!blk) {
1070                     // no replaceable block available: give up, fwd to
1071                     // next level.
1072                     incMissCount(pkt);
1073                     return false;
1074                 }
1075
1076                 blk->status |= (BlkValid | BlkReadable);
1077             }
1078         }
1079
1080         // at this point either this is a writeback or a write-through
1081         // write clean operation and the block is already in this
1082         // cache, we need to update the data and the block flags
1083         assert(blk);
1084         // TODO: the coherent cache can assert(!blk->isDirty());
1085         if (!pkt->writeThrough()) {
1086             blk->status |= BlkDirty;
1087         }
1088         // nothing else to do; writeback doesn't expect response
1089         assert(!pkt->needsResponse());
1090         pkt->writeDataToBlock(blk->data, blkSize);
1091         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1092
1093         incHitCount(pkt);
1094         // populate the time when the block will be ready to access.
1095         blk->whenReady = clockEdge(fillLatency) + pkt->headerDelay +
1096             pkt->payloadDelay;
1097         // if this a write-through packet it will be sent to cache
1098         // below
1099         return !pkt->writeThrough();
1100     } else if (blk && (pkt->needsWritable() ? blk->isWritable() :
1101                        blk->isReadable())) {
1102         // OK to satisfy access
1103         incHitCount(pkt);
1104         satisfyRequest(pkt, blk);
1105         maintainClusivity(pkt->fromCache(), blk);
1106
1107         return true;
1108     }
1109
1110     // Can't satisfy access normally... either no block (blk == nullptr)
1111     // or have block but need writable
1112
1113     incMissCount(pkt);
1114
1115     if (!blk && pkt->isLLSC() && pkt->isWrite()) {
1116         // complete miss on store conditional... just give up now
1117         pkt->req->setExtraData(0);
1118         return true;
1119     }
1120
1121     return false;
1122 }
1123
1124 void
1125 BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk)
1126 {
1127     if (from_cache && blk && blk->isValid() && !blk->isDirty() &&
1128         clusivity == Enums::mostly_excl) {
1129         // if we have responded to a cache, and our block is still
1130         // valid, but not dirty, and this cache is mostly exclusive
1131         // with respect to the cache above, drop the block
1132         invalidateBlock(blk);
1133     }
1134 }
1135
1136 CacheBlk*
1137 BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,
1138                       bool allocate)
1139 {
1140     assert(pkt->isResponse());
1141     Addr addr = pkt->getAddr();
1142     bool is_secure = pkt->isSecure();
1143 #if TRACING_ON
1144     CacheBlk::State old_state = blk ? blk->status : 0;
1145 #endif
1146
1147     // When handling a fill, we should have no writes to this line.
1148     assert(addr == pkt->getBlockAddr(blkSize));
1149     assert(!writeBuffer.findMatch(addr, is_secure));
1150
1151     if (!blk) {
1152         // better have read new data...
1153         assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp);
1154
1155         // need to do a replacement if allocating, otherwise we stick
1156         // with the temporary storage
1157         blk = allocate ? allocateBlock(pkt, writebacks) : nullptr;
1158
1159         if (!blk) {
1160             // No replaceable block or a mostly exclusive
1161             // cache... just use temporary storage to complete the
1162             // current request and then get rid of it
1163             assert(!tempBlock->isValid());
1164             blk = tempBlock;
1165             tempBlock->insert(addr, is_secure);
1166             DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr,
1167                     is_secure ? "s" : "ns");
1168         }
1169
1170         // we should never be overwriting a valid block
1171         assert(!blk->isValid());
1172     } else {
1173         // existing block... probably an upgrade
1174         assert(regenerateBlkAddr(blk) == addr);
1175         assert(blk->isSecure() == is_secure);
1176         // either we're getting new data or the block should already be valid
1177         assert(pkt->hasData() || blk->isValid());
1178         // don't clear block status... if block is already dirty we
1179         // don't want to lose that
1180     }
1181
1182     blk->status |= BlkValid | BlkReadable;
1183
1184     // sanity check for whole-line writes, which should always be
1185     // marked as writable as part of the fill, and then later marked
1186     // dirty as part of satisfyRequest
1187     if (pkt->cmd == MemCmd::InvalidateResp) {
1188         assert(!pkt->hasSharers());
1189     }
1190
1191     // here we deal with setting the appropriate state of the line,
1192     // and we start by looking at the hasSharers flag, and ignore the
1193     // cacheResponding flag (normally signalling dirty data) if the
1194     // packet has sharers, thus the line is never allocated as Owned
1195     // (dirty but not writable), and always ends up being either
1196     // Shared, Exclusive or Modified, see Packet::setCacheResponding
1197     // for more details
1198     if (!pkt->hasSharers()) {
1199         // we could get a writable line from memory (rather than a
1200         // cache) even in a read-only cache, note that we set this bit
1201         // even for a read-only cache, possibly revisit this decision
1202         blk->status |= BlkWritable;
1203
1204         // check if we got this via cache-to-cache transfer (i.e., from a
1205         // cache that had the block in Modified or Owned state)
1206         if (pkt->cacheResponding()) {
1207             // we got the block in Modified state, and invalidated the
1208             // owners copy
1209             blk->status |= BlkDirty;
1210
1211             chatty_assert(!isReadOnly, "Should never see dirty snoop response "
1212                           "in read-only cache %s\n", name());
1213         }
1214     }
1215
1216     DPRINTF(Cache, "Block addr %#llx (%s) moving from state %x to %s\n",
1217             addr, is_secure ? "s" : "ns", old_state, blk->print());
1218
1219     // if we got new data, copy it in (checking for a read response
1220     // and a response that has data is the same in the end)
1221     if (pkt->isRead()) {
1222         // sanity checks
1223         assert(pkt->hasData());
1224         assert(pkt->getSize() == blkSize);
1225
1226         pkt->writeDataToBlock(blk->data, blkSize);
1227     }
1228     // We pay for fillLatency here.
1229     blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
1230         pkt->payloadDelay;
1231
1232     return blk;
1233 }
1234
1235 CacheBlk*
1236 BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
1237 {
1238     // Get address
1239     const Addr addr = pkt->getAddr();
1240
1241     // Get secure bit
1242     const bool is_secure = pkt->isSecure();
1243
1244     // Find replacement victim
1245     std::vector<CacheBlk*> evict_blks;
1246     CacheBlk *victim = tags->findVictim(addr, is_secure, evict_blks);
1247
1248     // It is valid to return nullptr if there is no victim
1249     if (!victim)
1250         return nullptr;
1251
1252     // Print victim block's information
1253     DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print());
1254
1255     // Check for transient state allocations. If any of the entries listed
1256     // for eviction has a transient state, the allocation fails
1257     for (const auto& blk : evict_blks) {
1258         if (blk->isValid()) {
1259             Addr repl_addr = regenerateBlkAddr(blk);
1260             MSHR *repl_mshr = mshrQueue.findMatch(repl_addr, blk->isSecure());
1261             if (repl_mshr) {
1262                 // must be an outstanding upgrade or clean request
1263                 // on a block we're about to replace...
1264                 assert((!blk->isWritable() && repl_mshr->needsWritable()) ||
1265                        repl_mshr->isCleaning());
1266
1267                 // too hard to replace block with transient state
1268                 // allocation failed, block not inserted
1269                 return nullptr;
1270             }
1271         }
1272     }
1273
1274     // The victim will be replaced by a new entry, so increase the replacement
1275     // counter if a valid block is being replaced
1276     if (victim->isValid()) {
1277         DPRINTF(Cache, "replacement: replacing %#llx (%s) with %#llx "
1278                 "(%s): %s\n", regenerateBlkAddr(victim),
1279                 victim->isSecure() ? "s" : "ns",
1280                 addr, is_secure ? "s" : "ns",
1281                 victim->isDirty() ? "writeback" : "clean");
1282
1283         replacements++;
1284     }
1285
1286     // Evict valid blocks associated to this victim block
1287     for (const auto& blk : evict_blks) {
1288         if (blk->isValid()) {
1289             if (blk->wasPrefetched()) {
1290                 unusedPrefetches++;
1291             }
1292
1293             evictBlock(blk, writebacks);
1294         }
1295     }
1296
1297     // Insert new block at victimized entry
1298     tags->insertBlock(addr, is_secure, pkt->req->masterId(),
1299                       pkt->req->taskId(), victim);
1300
1301     return victim;
1302 }
1303
1304 void
1305 BaseCache::invalidateBlock(CacheBlk *blk)
1306 {
1307     // If handling a block present in the Tags, let it do its invalidation
1308     // process, which will update stats and invalidate the block itself
1309     if (blk != tempBlock) {
1310         tags->invalidate(blk);
1311     } else {
1312         tempBlock->invalidate();
1313     }
1314 }
1315
1316 void
1317 BaseCache::evictBlock(CacheBlk *blk, PacketList &writebacks)
1318 {
1319     PacketPtr pkt = evictBlock(blk);
1320     if (pkt) {
1321         writebacks.push_back(pkt);
1322     }
1323 }
1324
1325 PacketPtr
1326 BaseCache::writebackBlk(CacheBlk *blk)
1327 {
1328     chatty_assert(!isReadOnly || writebackClean,
1329                   "Writeback from read-only cache");
1330     assert(blk && blk->isValid() && (blk->isDirty() || writebackClean));
1331
1332     writebacks[Request::wbMasterId]++;
1333
1334     RequestPtr req = std::make_shared<Request>(
1335         regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1336
1337     if (blk->isSecure())
1338         req->setFlags(Request::SECURE);
1339
1340     req->taskId(blk->task_id);
1341
1342     PacketPtr pkt =
1343         new Packet(req, blk->isDirty() ?
1344                    MemCmd::WritebackDirty : MemCmd::WritebackClean);
1345
1346     DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n",
1347             pkt->print(), blk->isWritable(), blk->isDirty());
1348
1349     if (blk->isWritable()) {
1350         // not asserting shared means we pass the block in modified
1351         // state, mark our own block non-writeable
1352         blk->status &= ~BlkWritable;
1353     } else {
1354         // we are in the Owned state, tell the receiver
1355         pkt->setHasSharers();
1356     }
1357
1358     // make sure the block is not marked dirty
1359     blk->status &= ~BlkDirty;
1360
1361     pkt->allocate();
1362     pkt->setDataFromBlock(blk->data, blkSize);
1363
1364     return pkt;
1365 }
1366
1367 PacketPtr
1368 BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
1369 {
1370     RequestPtr req = std::make_shared<Request>(
1371         regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1372
1373     if (blk->isSecure()) {
1374         req->setFlags(Request::SECURE);
1375     }
1376     req->taskId(blk->task_id);
1377
1378     PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id);
1379
1380     if (dest) {
1381         req->setFlags(dest);
1382         pkt->setWriteThrough();
1383     }
1384
1385     DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(),
1386             blk->isWritable(), blk->isDirty());
1387
1388     if (blk->isWritable()) {
1389         // not asserting shared means we pass the block in modified
1390         // state, mark our own block non-writeable
1391         blk->status &= ~BlkWritable;
1392     } else {
1393         // we are in the Owned state, tell the receiver
1394         pkt->setHasSharers();
1395     }
1396
1397     // make sure the block is not marked dirty
1398     blk->status &= ~BlkDirty;
1399
1400     pkt->allocate();
1401     pkt->setDataFromBlock(blk->data, blkSize);
1402
1403     return pkt;
1404 }
1405
1406
1407 void
1408 BaseCache::memWriteback()
1409 {
1410     tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); });
1411 }
1412
1413 void
1414 BaseCache::memInvalidate()
1415 {
1416     tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); });
1417 }
1418
1419 bool
1420 BaseCache::isDirty() const
1421 {
1422     return tags->anyBlk([](CacheBlk &blk) { return blk.isDirty(); });
1423 }
1424
1425 void
1426 BaseCache::writebackVisitor(CacheBlk &blk)
1427 {
1428     if (blk.isDirty()) {
1429         assert(blk.isValid());
1430
1431         RequestPtr request = std::make_shared<Request>(
1432             regenerateBlkAddr(&blk), blkSize, 0, Request::funcMasterId);
1433
1434         request->taskId(blk.task_id);
1435         if (blk.isSecure()) {
1436             request->setFlags(Request::SECURE);
1437         }
1438
1439         Packet packet(request, MemCmd::WriteReq);
1440         packet.dataStatic(blk.data);
1441
1442         memSidePort.sendFunctional(&packet);
1443
1444         blk.status &= ~BlkDirty;
1445     }
1446 }
1447
1448 void
1449 BaseCache::invalidateVisitor(CacheBlk &blk)
1450 {
1451     if (blk.isDirty())
1452         warn_once("Invalidating dirty cache lines. " \
1453                   "Expect things to break.\n");
1454
1455     if (blk.isValid()) {
1456         assert(!blk.isDirty());
1457         invalidateBlock(&blk);
1458     }
1459 }
1460
1461 Tick
1462 BaseCache::nextQueueReadyTime() const
1463 {
1464     Tick nextReady = std::min(mshrQueue.nextReadyTime(),
1465                               writeBuffer.nextReadyTime());
1466
1467     // Don't signal prefetch ready time if no MSHRs available
1468     // Will signal once enoguh MSHRs are deallocated
1469     if (prefetcher && mshrQueue.canPrefetch()) {
1470         nextReady = std::min(nextReady,
1471                              prefetcher->nextPrefetchReadyTime());
1472     }
1473
1474     return nextReady;
1475 }
1476
1477
1478 bool
1479 BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1480 {
1481     assert(mshr);
1482
1483     // use request from 1st target
1484     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1485
1486     DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1487
1488     // if the cache is in write coalescing mode or (additionally) in
1489     // no allocation mode, and we have a write packet with an MSHR
1490     // that is not a whole-line write (due to incompatible flags etc),
1491     // then reset the write mode
1492     if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) {
1493         if (!mshr->isWholeLineWrite()) {
1494             // if we are currently write coalescing, hold on the
1495             // MSHR as many cycles extra as we need to completely
1496             // write a cache line
1497             if (writeAllocator->delay(mshr->blkAddr)) {
1498                 Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod();
1499                 DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow "
1500                         "for write coalescing\n", tgt_pkt->print(), delay);
1501                 mshrQueue.delay(mshr, delay);
1502                 return false;
1503             } else {
1504                 writeAllocator->reset();
1505             }
1506         } else {
1507             writeAllocator->resetDelay(mshr->blkAddr);
1508         }
1509     }
1510
1511     CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
1512
1513     // either a prefetch that is not present upstream, or a normal
1514     // MSHR request, proceed to get the packet to send downstream
1515     PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(),
1516                                      mshr->isWholeLineWrite());
1517
1518     mshr->isForward = (pkt == nullptr);
1519
1520     if (mshr->isForward) {
1521         // not a cache block request, but a response is expected
1522         // make copy of current packet to forward, keep current
1523         // copy for response handling
1524         pkt = new Packet(tgt_pkt, false, true);
1525         assert(!pkt->isWrite());
1526     }
1527
1528     // play it safe and append (rather than set) the sender state,
1529     // as forwarded packets may already have existing state
1530     pkt->pushSenderState(mshr);
1531
1532     if (pkt->isClean() && blk && blk->isDirty()) {
1533         // A cache clean opearation is looking for a dirty block. Mark
1534         // the packet so that the destination xbar can determine that
1535         // there will be a follow-up write packet as well.
1536         pkt->setSatisfied();
1537     }
1538
1539     if (!memSidePort.sendTimingReq(pkt)) {
1540         // we are awaiting a retry, but we
1541         // delete the packet and will be creating a new packet
1542         // when we get the opportunity
1543         delete pkt;
1544
1545         // note that we have now masked any requestBus and
1546         // schedSendEvent (we will wait for a retry before
1547         // doing anything), and this is so even if we do not
1548         // care about this packet and might override it before
1549         // it gets retried
1550         return true;
1551     } else {
1552         // As part of the call to sendTimingReq the packet is
1553         // forwarded to all neighbouring caches (and any caches
1554         // above them) as a snoop. Thus at this point we know if
1555         // any of the neighbouring caches are responding, and if
1556         // so, we know it is dirty, and we can determine if it is
1557         // being passed as Modified, making our MSHR the ordering
1558         // point
1559         bool pending_modified_resp = !pkt->hasSharers() &&
1560             pkt->cacheResponding();
1561         markInService(mshr, pending_modified_resp);
1562
1563         if (pkt->isClean() && blk && blk->isDirty()) {
1564             // A cache clean opearation is looking for a dirty
1565             // block. If a dirty block is encountered a WriteClean
1566             // will update any copies to the path to the memory
1567             // until the point of reference.
1568             DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
1569                     __func__, pkt->print(), blk->print());
1570             PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
1571                                              pkt->id);
1572             PacketList writebacks;
1573             writebacks.push_back(wb_pkt);
1574             doWritebacks(writebacks, 0);
1575         }
1576
1577         return false;
1578     }
1579 }
1580
1581 bool
1582 BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry)
1583 {
1584     assert(wq_entry);
1585
1586     // always a single target for write queue entries
1587     PacketPtr tgt_pkt = wq_entry->getTarget()->pkt;
1588
1589     DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print());
1590
1591     // forward as is, both for evictions and uncacheable writes
1592     if (!memSidePort.sendTimingReq(tgt_pkt)) {
1593         // note that we have now masked any requestBus and
1594         // schedSendEvent (we will wait for a retry before
1595         // doing anything), and this is so even if we do not
1596         // care about this packet and might override it before
1597         // it gets retried
1598         return true;
1599     } else {
1600         markInService(wq_entry);
1601         return false;
1602     }
1603 }
1604
1605 void
1606 BaseCache::serialize(CheckpointOut &cp) const
1607 {
1608     bool dirty(isDirty());
1609
1610     if (dirty) {
1611         warn("*** The cache still contains dirty data. ***\n");
1612         warn("    Make sure to drain the system using the correct flags.\n");
1613         warn("    This checkpoint will not restore correctly " \
1614              "and dirty data in the cache will be lost!\n");
1615     }
1616
1617     // Since we don't checkpoint the data in the cache, any dirty data
1618     // will be lost when restoring from a checkpoint of a system that
1619     // wasn't drained properly. Flag the checkpoint as invalid if the
1620     // cache contains dirty data.
1621     bool bad_checkpoint(dirty);
1622     SERIALIZE_SCALAR(bad_checkpoint);
1623 }
1624
1625 void
1626 BaseCache::unserialize(CheckpointIn &cp)
1627 {
1628     bool bad_checkpoint;
1629     UNSERIALIZE_SCALAR(bad_checkpoint);
1630     if (bad_checkpoint) {
1631         fatal("Restoring from checkpoints with dirty caches is not "
1632               "supported in the classic memory system. Please remove any "
1633               "caches or drain them properly before taking checkpoints.\n");
1634     }
1635 }
1636
1637 void
1638 BaseCache::regStats()
1639 {
1640     MemObject::regStats();
1641
1642     using namespace Stats;
1643
1644     // Hit statistics
1645     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1646         MemCmd cmd(access_idx);
1647         const string &cstr = cmd.toString();
1648
1649         hits[access_idx]
1650             .init(system->maxMasters())
1651             .name(name() + "." + cstr + "_hits")
1652             .desc("number of " + cstr + " hits")
1653             .flags(total | nozero | nonan)
1654             ;
1655         for (int i = 0; i < system->maxMasters(); i++) {
1656             hits[access_idx].subname(i, system->getMasterName(i));
1657         }
1658     }
1659
1660 // These macros make it easier to sum the right subset of commands and
1661 // to change the subset of commands that are considered "demand" vs
1662 // "non-demand"
1663 #define SUM_DEMAND(s) \
1664     (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::WriteLineReq] + \
1665      s[MemCmd::ReadExReq] + s[MemCmd::ReadCleanReq] + s[MemCmd::ReadSharedReq])
1666
1667 // should writebacks be included here?  prior code was inconsistent...
1668 #define SUM_NON_DEMAND(s) \
1669     (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq])
1670
1671     demandHits
1672         .name(name() + ".demand_hits")
1673         .desc("number of demand (read+write) hits")
1674         .flags(total | nozero | nonan)
1675         ;
1676     demandHits = SUM_DEMAND(hits);
1677     for (int i = 0; i < system->maxMasters(); i++) {
1678         demandHits.subname(i, system->getMasterName(i));
1679     }
1680
1681     overallHits
1682         .name(name() + ".overall_hits")
1683         .desc("number of overall hits")
1684         .flags(total | nozero | nonan)
1685         ;
1686     overallHits = demandHits + SUM_NON_DEMAND(hits);
1687     for (int i = 0; i < system->maxMasters(); i++) {
1688         overallHits.subname(i, system->getMasterName(i));
1689     }
1690
1691     // Miss statistics
1692     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1693         MemCmd cmd(access_idx);
1694         const string &cstr = cmd.toString();
1695
1696         misses[access_idx]
1697             .init(system->maxMasters())
1698             .name(name() + "." + cstr + "_misses")
1699             .desc("number of " + cstr + " misses")
1700             .flags(total | nozero | nonan)
1701             ;
1702         for (int i = 0; i < system->maxMasters(); i++) {
1703             misses[access_idx].subname(i, system->getMasterName(i));
1704         }
1705     }
1706
1707     demandMisses
1708         .name(name() + ".demand_misses")
1709         .desc("number of demand (read+write) misses")
1710         .flags(total | nozero | nonan)
1711         ;
1712     demandMisses = SUM_DEMAND(misses);
1713     for (int i = 0; i < system->maxMasters(); i++) {
1714         demandMisses.subname(i, system->getMasterName(i));
1715     }
1716
1717     overallMisses
1718         .name(name() + ".overall_misses")
1719         .desc("number of overall misses")
1720         .flags(total | nozero | nonan)
1721         ;
1722     overallMisses = demandMisses + SUM_NON_DEMAND(misses);
1723     for (int i = 0; i < system->maxMasters(); i++) {
1724         overallMisses.subname(i, system->getMasterName(i));
1725     }
1726
1727     // Miss latency statistics
1728     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1729         MemCmd cmd(access_idx);
1730         const string &cstr = cmd.toString();
1731
1732         missLatency[access_idx]
1733             .init(system->maxMasters())
1734             .name(name() + "." + cstr + "_miss_latency")
1735             .desc("number of " + cstr + " miss cycles")
1736             .flags(total | nozero | nonan)
1737             ;
1738         for (int i = 0; i < system->maxMasters(); i++) {
1739             missLatency[access_idx].subname(i, system->getMasterName(i));
1740         }
1741     }
1742
1743     demandMissLatency
1744         .name(name() + ".demand_miss_latency")
1745         .desc("number of demand (read+write) miss cycles")
1746         .flags(total | nozero | nonan)
1747         ;
1748     demandMissLatency = SUM_DEMAND(missLatency);
1749     for (int i = 0; i < system->maxMasters(); i++) {
1750         demandMissLatency.subname(i, system->getMasterName(i));
1751     }
1752
1753     overallMissLatency
1754         .name(name() + ".overall_miss_latency")
1755         .desc("number of overall miss cycles")
1756         .flags(total | nozero | nonan)
1757         ;
1758     overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
1759     for (int i = 0; i < system->maxMasters(); i++) {
1760         overallMissLatency.subname(i, system->getMasterName(i));
1761     }
1762
1763     // access formulas
1764     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1765         MemCmd cmd(access_idx);
1766         const string &cstr = cmd.toString();
1767
1768         accesses[access_idx]
1769             .name(name() + "." + cstr + "_accesses")
1770             .desc("number of " + cstr + " accesses(hits+misses)")
1771             .flags(total | nozero | nonan)
1772             ;
1773         accesses[access_idx] = hits[access_idx] + misses[access_idx];
1774
1775         for (int i = 0; i < system->maxMasters(); i++) {
1776             accesses[access_idx].subname(i, system->getMasterName(i));
1777         }
1778     }
1779
1780     demandAccesses
1781         .name(name() + ".demand_accesses")
1782         .desc("number of demand (read+write) accesses")
1783         .flags(total | nozero | nonan)
1784         ;
1785     demandAccesses = demandHits + demandMisses;
1786     for (int i = 0; i < system->maxMasters(); i++) {
1787         demandAccesses.subname(i, system->getMasterName(i));
1788     }
1789
1790     overallAccesses
1791         .name(name() + ".overall_accesses")
1792         .desc("number of overall (read+write) accesses")
1793         .flags(total | nozero | nonan)
1794         ;
1795     overallAccesses = overallHits + overallMisses;
1796     for (int i = 0; i < system->maxMasters(); i++) {
1797         overallAccesses.subname(i, system->getMasterName(i));
1798     }
1799
1800     // miss rate formulas
1801     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1802         MemCmd cmd(access_idx);
1803         const string &cstr = cmd.toString();
1804
1805         missRate[access_idx]
1806             .name(name() + "." + cstr + "_miss_rate")
1807             .desc("miss rate for " + cstr + " accesses")
1808             .flags(total | nozero | nonan)
1809             ;
1810         missRate[access_idx] = misses[access_idx] / accesses[access_idx];
1811
1812         for (int i = 0; i < system->maxMasters(); i++) {
1813             missRate[access_idx].subname(i, system->getMasterName(i));
1814         }
1815     }
1816
1817     demandMissRate
1818         .name(name() + ".demand_miss_rate")
1819         .desc("miss rate for demand accesses")
1820         .flags(total | nozero | nonan)
1821         ;
1822     demandMissRate = demandMisses / demandAccesses;
1823     for (int i = 0; i < system->maxMasters(); i++) {
1824         demandMissRate.subname(i, system->getMasterName(i));
1825     }
1826
1827     overallMissRate
1828         .name(name() + ".overall_miss_rate")
1829         .desc("miss rate for overall accesses")
1830         .flags(total | nozero | nonan)
1831         ;
1832     overallMissRate = overallMisses / overallAccesses;
1833     for (int i = 0; i < system->maxMasters(); i++) {
1834         overallMissRate.subname(i, system->getMasterName(i));
1835     }
1836
1837     // miss latency formulas
1838     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1839         MemCmd cmd(access_idx);
1840         const string &cstr = cmd.toString();
1841
1842         avgMissLatency[access_idx]
1843             .name(name() + "." + cstr + "_avg_miss_latency")
1844             .desc("average " + cstr + " miss latency")
1845             .flags(total | nozero | nonan)
1846             ;
1847         avgMissLatency[access_idx] =
1848             missLatency[access_idx] / misses[access_idx];
1849
1850         for (int i = 0; i < system->maxMasters(); i++) {
1851             avgMissLatency[access_idx].subname(i, system->getMasterName(i));
1852         }
1853     }
1854
1855     demandAvgMissLatency
1856         .name(name() + ".demand_avg_miss_latency")
1857         .desc("average overall miss latency")
1858         .flags(total | nozero | nonan)
1859         ;
1860     demandAvgMissLatency = demandMissLatency / demandMisses;
1861     for (int i = 0; i < system->maxMasters(); i++) {
1862         demandAvgMissLatency.subname(i, system->getMasterName(i));
1863     }
1864
1865     overallAvgMissLatency
1866         .name(name() + ".overall_avg_miss_latency")
1867         .desc("average overall miss latency")
1868         .flags(total | nozero | nonan)
1869         ;
1870     overallAvgMissLatency = overallMissLatency / overallMisses;
1871     for (int i = 0; i < system->maxMasters(); i++) {
1872         overallAvgMissLatency.subname(i, system->getMasterName(i));
1873     }
1874
1875     blocked_cycles.init(NUM_BLOCKED_CAUSES);
1876     blocked_cycles
1877         .name(name() + ".blocked_cycles")
1878         .desc("number of cycles access was blocked")
1879         .subname(Blocked_NoMSHRs, "no_mshrs")
1880         .subname(Blocked_NoTargets, "no_targets")
1881         ;
1882
1883
1884     blocked_causes.init(NUM_BLOCKED_CAUSES);
1885     blocked_causes
1886         .name(name() + ".blocked")
1887         .desc("number of cycles access was blocked")
1888         .subname(Blocked_NoMSHRs, "no_mshrs")
1889         .subname(Blocked_NoTargets, "no_targets")
1890         ;
1891
1892     avg_blocked
1893         .name(name() + ".avg_blocked_cycles")
1894         .desc("average number of cycles each access was blocked")
1895         .subname(Blocked_NoMSHRs, "no_mshrs")
1896         .subname(Blocked_NoTargets, "no_targets")
1897         ;
1898
1899     avg_blocked = blocked_cycles / blocked_causes;
1900
1901     unusedPrefetches
1902         .name(name() + ".unused_prefetches")
1903         .desc("number of HardPF blocks evicted w/o reference")
1904         .flags(nozero)
1905         ;
1906
1907     writebacks
1908         .init(system->maxMasters())
1909         .name(name() + ".writebacks")
1910         .desc("number of writebacks")
1911         .flags(total | nozero | nonan)
1912         ;
1913     for (int i = 0; i < system->maxMasters(); i++) {
1914         writebacks.subname(i, system->getMasterName(i));
1915     }
1916
1917     // MSHR statistics
1918     // MSHR hit statistics
1919     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1920         MemCmd cmd(access_idx);
1921         const string &cstr = cmd.toString();
1922
1923         mshr_hits[access_idx]
1924             .init(system->maxMasters())
1925             .name(name() + "." + cstr + "_mshr_hits")
1926             .desc("number of " + cstr + " MSHR hits")
1927             .flags(total | nozero | nonan)
1928             ;
1929         for (int i = 0; i < system->maxMasters(); i++) {
1930             mshr_hits[access_idx].subname(i, system->getMasterName(i));
1931         }
1932     }
1933
1934     demandMshrHits
1935         .name(name() + ".demand_mshr_hits")
1936         .desc("number of demand (read+write) MSHR hits")
1937         .flags(total | nozero | nonan)
1938         ;
1939     demandMshrHits = SUM_DEMAND(mshr_hits);
1940     for (int i = 0; i < system->maxMasters(); i++) {
1941         demandMshrHits.subname(i, system->getMasterName(i));
1942     }
1943
1944     overallMshrHits
1945         .name(name() + ".overall_mshr_hits")
1946         .desc("number of overall MSHR hits")
1947         .flags(total | nozero | nonan)
1948         ;
1949     overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
1950     for (int i = 0; i < system->maxMasters(); i++) {
1951         overallMshrHits.subname(i, system->getMasterName(i));
1952     }
1953
1954     // MSHR miss statistics
1955     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1956         MemCmd cmd(access_idx);
1957         const string &cstr = cmd.toString();
1958
1959         mshr_misses[access_idx]
1960             .init(system->maxMasters())
1961             .name(name() + "." + cstr + "_mshr_misses")
1962             .desc("number of " + cstr + " MSHR misses")
1963             .flags(total | nozero | nonan)
1964             ;
1965         for (int i = 0; i < system->maxMasters(); i++) {
1966             mshr_misses[access_idx].subname(i, system->getMasterName(i));
1967         }
1968     }
1969
1970     demandMshrMisses
1971         .name(name() + ".demand_mshr_misses")
1972         .desc("number of demand (read+write) MSHR misses")
1973         .flags(total | nozero | nonan)
1974         ;
1975     demandMshrMisses = SUM_DEMAND(mshr_misses);
1976     for (int i = 0; i < system->maxMasters(); i++) {
1977         demandMshrMisses.subname(i, system->getMasterName(i));
1978     }
1979
1980     overallMshrMisses
1981         .name(name() + ".overall_mshr_misses")
1982         .desc("number of overall MSHR misses")
1983         .flags(total | nozero | nonan)
1984         ;
1985     overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
1986     for (int i = 0; i < system->maxMasters(); i++) {
1987         overallMshrMisses.subname(i, system->getMasterName(i));
1988     }
1989
1990     // MSHR miss latency statistics
1991     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1992         MemCmd cmd(access_idx);
1993         const string &cstr = cmd.toString();
1994
1995         mshr_miss_latency[access_idx]
1996             .init(system->maxMasters())
1997             .name(name() + "." + cstr + "_mshr_miss_latency")
1998             .desc("number of " + cstr + " MSHR miss cycles")
1999             .flags(total | nozero | nonan)
2000             ;
2001         for (int i = 0; i < system->maxMasters(); i++) {
2002             mshr_miss_latency[access_idx].subname(i, system->getMasterName(i));
2003         }
2004     }
2005
2006     demandMshrMissLatency
2007         .name(name() + ".demand_mshr_miss_latency")
2008         .desc("number of demand (read+write) MSHR miss cycles")
2009         .flags(total | nozero | nonan)
2010         ;
2011     demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
2012     for (int i = 0; i < system->maxMasters(); i++) {
2013         demandMshrMissLatency.subname(i, system->getMasterName(i));
2014     }
2015
2016     overallMshrMissLatency
2017         .name(name() + ".overall_mshr_miss_latency")
2018         .desc("number of overall MSHR miss cycles")
2019         .flags(total | nozero | nonan)
2020         ;
2021     overallMshrMissLatency =
2022         demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
2023     for (int i = 0; i < system->maxMasters(); i++) {
2024         overallMshrMissLatency.subname(i, system->getMasterName(i));
2025     }
2026
2027     // MSHR uncacheable statistics
2028     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2029         MemCmd cmd(access_idx);
2030         const string &cstr = cmd.toString();
2031
2032         mshr_uncacheable[access_idx]
2033             .init(system->maxMasters())
2034             .name(name() + "." + cstr + "_mshr_uncacheable")
2035             .desc("number of " + cstr + " MSHR uncacheable")
2036             .flags(total | nozero | nonan)
2037             ;
2038         for (int i = 0; i < system->maxMasters(); i++) {
2039             mshr_uncacheable[access_idx].subname(i, system->getMasterName(i));
2040         }
2041     }
2042
2043     overallMshrUncacheable
2044         .name(name() + ".overall_mshr_uncacheable_misses")
2045         .desc("number of overall MSHR uncacheable misses")
2046         .flags(total | nozero | nonan)
2047         ;
2048     overallMshrUncacheable =
2049         SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
2050     for (int i = 0; i < system->maxMasters(); i++) {
2051         overallMshrUncacheable.subname(i, system->getMasterName(i));
2052     }
2053
2054     // MSHR miss latency statistics
2055     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2056         MemCmd cmd(access_idx);
2057         const string &cstr = cmd.toString();
2058
2059         mshr_uncacheable_lat[access_idx]
2060             .init(system->maxMasters())
2061             .name(name() + "." + cstr + "_mshr_uncacheable_latency")
2062             .desc("number of " + cstr + " MSHR uncacheable cycles")
2063             .flags(total | nozero | nonan)
2064             ;
2065         for (int i = 0; i < system->maxMasters(); i++) {
2066             mshr_uncacheable_lat[access_idx].subname(
2067                 i, system->getMasterName(i));
2068         }
2069     }
2070
2071     overallMshrUncacheableLatency
2072         .name(name() + ".overall_mshr_uncacheable_latency")
2073         .desc("number of overall MSHR uncacheable cycles")
2074         .flags(total | nozero | nonan)
2075         ;
2076     overallMshrUncacheableLatency =
2077         SUM_DEMAND(mshr_uncacheable_lat) +
2078         SUM_NON_DEMAND(mshr_uncacheable_lat);
2079     for (int i = 0; i < system->maxMasters(); i++) {
2080         overallMshrUncacheableLatency.subname(i, system->getMasterName(i));
2081     }
2082
2083 #if 0
2084     // MSHR access formulas
2085     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2086         MemCmd cmd(access_idx);
2087         const string &cstr = cmd.toString();
2088
2089         mshrAccesses[access_idx]
2090             .name(name() + "." + cstr + "_mshr_accesses")
2091             .desc("number of " + cstr + " mshr accesses(hits+misses)")
2092             .flags(total | nozero | nonan)
2093             ;
2094         mshrAccesses[access_idx] =
2095             mshr_hits[access_idx] + mshr_misses[access_idx]
2096             + mshr_uncacheable[access_idx];
2097     }
2098
2099     demandMshrAccesses
2100         .name(name() + ".demand_mshr_accesses")
2101         .desc("number of demand (read+write) mshr accesses")
2102         .flags(total | nozero | nonan)
2103         ;
2104     demandMshrAccesses = demandMshrHits + demandMshrMisses;
2105
2106     overallMshrAccesses
2107         .name(name() + ".overall_mshr_accesses")
2108         .desc("number of overall (read+write) mshr accesses")
2109         .flags(total | nozero | nonan)
2110         ;
2111     overallMshrAccesses = overallMshrHits + overallMshrMisses
2112         + overallMshrUncacheable;
2113 #endif
2114
2115     // MSHR miss rate formulas
2116     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2117         MemCmd cmd(access_idx);
2118         const string &cstr = cmd.toString();
2119
2120         mshrMissRate[access_idx]
2121             .name(name() + "." + cstr + "_mshr_miss_rate")
2122             .desc("mshr miss rate for " + cstr + " accesses")
2123             .flags(total | nozero | nonan)
2124             ;
2125         mshrMissRate[access_idx] =
2126             mshr_misses[access_idx] / accesses[access_idx];
2127
2128         for (int i = 0; i < system->maxMasters(); i++) {
2129             mshrMissRate[access_idx].subname(i, system->getMasterName(i));
2130         }
2131     }
2132
2133     demandMshrMissRate
2134         .name(name() + ".demand_mshr_miss_rate")
2135         .desc("mshr miss rate for demand accesses")
2136         .flags(total | nozero | nonan)
2137         ;
2138     demandMshrMissRate = demandMshrMisses / demandAccesses;
2139     for (int i = 0; i < system->maxMasters(); i++) {
2140         demandMshrMissRate.subname(i, system->getMasterName(i));
2141     }
2142
2143     overallMshrMissRate
2144         .name(name() + ".overall_mshr_miss_rate")
2145         .desc("mshr miss rate for overall accesses")
2146         .flags(total | nozero | nonan)
2147         ;
2148     overallMshrMissRate = overallMshrMisses / overallAccesses;
2149     for (int i = 0; i < system->maxMasters(); i++) {
2150         overallMshrMissRate.subname(i, system->getMasterName(i));
2151     }
2152
2153     // mshrMiss latency formulas
2154     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2155         MemCmd cmd(access_idx);
2156         const string &cstr = cmd.toString();
2157
2158         avgMshrMissLatency[access_idx]
2159             .name(name() + "." + cstr + "_avg_mshr_miss_latency")
2160             .desc("average " + cstr + " mshr miss latency")
2161             .flags(total | nozero | nonan)
2162             ;
2163         avgMshrMissLatency[access_idx] =
2164             mshr_miss_latency[access_idx] / mshr_misses[access_idx];
2165
2166         for (int i = 0; i < system->maxMasters(); i++) {
2167             avgMshrMissLatency[access_idx].subname(
2168                 i, system->getMasterName(i));
2169         }
2170     }
2171
2172     demandAvgMshrMissLatency
2173         .name(name() + ".demand_avg_mshr_miss_latency")
2174         .desc("average overall mshr miss latency")
2175         .flags(total | nozero | nonan)
2176         ;
2177     demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
2178     for (int i = 0; i < system->maxMasters(); i++) {
2179         demandAvgMshrMissLatency.subname(i, system->getMasterName(i));
2180     }
2181
2182     overallAvgMshrMissLatency
2183         .name(name() + ".overall_avg_mshr_miss_latency")
2184         .desc("average overall mshr miss latency")
2185         .flags(total | nozero | nonan)
2186         ;
2187     overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
2188     for (int i = 0; i < system->maxMasters(); i++) {
2189         overallAvgMshrMissLatency.subname(i, system->getMasterName(i));
2190     }
2191
2192     // mshrUncacheable latency formulas
2193     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2194         MemCmd cmd(access_idx);
2195         const string &cstr = cmd.toString();
2196
2197         avgMshrUncacheableLatency[access_idx]
2198             .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
2199             .desc("average " + cstr + " mshr uncacheable latency")
2200             .flags(total | nozero | nonan)
2201             ;
2202         avgMshrUncacheableLatency[access_idx] =
2203             mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
2204
2205         for (int i = 0; i < system->maxMasters(); i++) {
2206             avgMshrUncacheableLatency[access_idx].subname(
2207                 i, system->getMasterName(i));
2208         }
2209     }
2210
2211     overallAvgMshrUncacheableLatency
2212         .name(name() + ".overall_avg_mshr_uncacheable_latency")
2213         .desc("average overall mshr uncacheable latency")
2214         .flags(total | nozero | nonan)
2215         ;
2216     overallAvgMshrUncacheableLatency =
2217         overallMshrUncacheableLatency / overallMshrUncacheable;
2218     for (int i = 0; i < system->maxMasters(); i++) {
2219         overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i));
2220     }
2221
2222     replacements
2223         .name(name() + ".replacements")
2224         .desc("number of replacements")
2225         ;
2226 }
2227
2228 ///////////////
2229 //
2230 // CpuSidePort
2231 //
2232 ///////////////
2233 bool
2234 BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
2235 {
2236     // Snoops shouldn't happen when bypassing caches
2237     assert(!cache->system->bypassCaches());
2238
2239     assert(pkt->isResponse());
2240
2241     // Express snoop responses from master to slave, e.g., from L1 to L2
2242     cache->recvTimingSnoopResp(pkt);
2243     return true;
2244 }
2245
2246
2247 bool
2248 BaseCache::CpuSidePort::tryTiming(PacketPtr pkt)
2249 {
2250     if (cache->system->bypassCaches() || pkt->isExpressSnoop()) {
2251         // always let express snoop packets through even if blocked
2252         return true;
2253     } else if (blocked || mustSendRetry) {
2254         // either already committed to send a retry, or blocked
2255         mustSendRetry = true;
2256         return false;
2257     }
2258     mustSendRetry = false;
2259     return true;
2260 }
2261
2262 bool
2263 BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
2264 {
2265     assert(pkt->isRequest());
2266
2267     if (cache->system->bypassCaches()) {
2268         // Just forward the packet if caches are disabled.
2269         // @todo This should really enqueue the packet rather
2270         bool M5_VAR_USED success = cache->memSidePort.sendTimingReq(pkt);
2271         assert(success);
2272         return true;
2273     } else if (tryTiming(pkt)) {
2274         cache->recvTimingReq(pkt);
2275         return true;
2276     }
2277     return false;
2278 }
2279
2280 Tick
2281 BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt)
2282 {
2283     if (cache->system->bypassCaches()) {
2284         // Forward the request if the system is in cache bypass mode.
2285         return cache->memSidePort.sendAtomic(pkt);
2286     } else {
2287         return cache->recvAtomic(pkt);
2288     }
2289 }
2290
2291 void
2292 BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt)
2293 {
2294     if (cache->system->bypassCaches()) {
2295         // The cache should be flushed if we are in cache bypass mode,
2296         // so we don't need to check if we need to update anything.
2297         cache->memSidePort.sendFunctional(pkt);
2298         return;
2299     }
2300
2301     // functional request
2302     cache->functionalAccess(pkt, true);
2303 }
2304
2305 AddrRangeList
2306 BaseCache::CpuSidePort::getAddrRanges() const
2307 {
2308     return cache->getAddrRanges();
2309 }
2310
2311
2312 BaseCache::
2313 CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache,
2314                          const std::string &_label)
2315     : CacheSlavePort(_name, _cache, _label), cache(_cache)
2316 {
2317 }
2318
2319 ///////////////
2320 //
2321 // MemSidePort
2322 //
2323 ///////////////
2324 bool
2325 BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt)
2326 {
2327     cache->recvTimingResp(pkt);
2328     return true;
2329 }
2330
2331 // Express snooping requests to memside port
2332 void
2333 BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt)
2334 {
2335     // Snoops shouldn't happen when bypassing caches
2336     assert(!cache->system->bypassCaches());
2337
2338     // handle snooping requests
2339     cache->recvTimingSnoopReq(pkt);
2340 }
2341
2342 Tick
2343 BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt)
2344 {
2345     // Snoops shouldn't happen when bypassing caches
2346     assert(!cache->system->bypassCaches());
2347
2348     return cache->recvAtomicSnoop(pkt);
2349 }
2350
2351 void
2352 BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt)
2353 {
2354     // Snoops shouldn't happen when bypassing caches
2355     assert(!cache->system->bypassCaches());
2356
2357     // functional snoop (note that in contrast to atomic we don't have
2358     // a specific functionalSnoop method, as they have the same
2359     // behaviour regardless)
2360     cache->functionalAccess(pkt, false);
2361 }
2362
2363 void
2364 BaseCache::CacheReqPacketQueue::sendDeferredPacket()
2365 {
2366     // sanity check
2367     assert(!waitingOnRetry);
2368
2369     // there should never be any deferred request packets in the
2370     // queue, instead we resly on the cache to provide the packets
2371     // from the MSHR queue or write queue
2372     assert(deferredPacketReadyTime() == MaxTick);
2373
2374     // check for request packets (requests & writebacks)
2375     QueueEntry* entry = cache.getNextQueueEntry();
2376
2377     if (!entry) {
2378         // can happen if e.g. we attempt a writeback and fail, but
2379         // before the retry, the writeback is eliminated because
2380         // we snoop another cache's ReadEx.
2381     } else {
2382         // let our snoop responses go first if there are responses to
2383         // the same addresses
2384         if (checkConflictingSnoop(entry->blkAddr)) {
2385             return;
2386         }
2387         waitingOnRetry = entry->sendPacket(cache);
2388     }
2389
2390     // if we succeeded and are not waiting for a retry, schedule the
2391     // next send considering when the next queue is ready, note that
2392     // snoop responses have their own packet queue and thus schedule
2393     // their own events
2394     if (!waitingOnRetry) {
2395         schedSendEvent(cache.nextQueueReadyTime());
2396     }
2397 }
2398
2399 BaseCache::MemSidePort::MemSidePort(const std::string &_name,
2400                                     BaseCache *_cache,
2401                                     const std::string &_label)
2402     : CacheMasterPort(_name, _cache, _reqQueue, _snoopRespQueue),
2403       _reqQueue(*_cache, *this, _snoopRespQueue, _label),
2404       _snoopRespQueue(*_cache, *this, _label), cache(_cache)
2405 {
2406 }
2407
2408 void
2409 WriteAllocator::updateMode(Addr write_addr, unsigned write_size,
2410                            Addr blk_addr)
2411 {
2412     // check if we are continuing where the last write ended
2413     if (nextAddr == write_addr) {
2414         delayCtr[blk_addr] = delayThreshold;
2415         // stop if we have already saturated
2416         if (mode != WriteMode::NO_ALLOCATE) {
2417             byteCount += write_size;
2418             // switch to streaming mode if we have passed the lower
2419             // threshold
2420             if (mode == WriteMode::ALLOCATE &&
2421                 byteCount > coalesceLimit) {
2422                 mode = WriteMode::COALESCE;
2423                 DPRINTF(Cache, "Switched to write coalescing\n");
2424             } else if (mode == WriteMode::COALESCE &&
2425                        byteCount > noAllocateLimit) {
2426                 // and continue and switch to non-allocating mode if we
2427                 // pass the upper threshold
2428                 mode = WriteMode::NO_ALLOCATE;
2429                 DPRINTF(Cache, "Switched to write-no-allocate\n");
2430             }
2431         }
2432     } else {
2433         // we did not see a write matching the previous one, start
2434         // over again
2435         byteCount = write_size;
2436         mode = WriteMode::ALLOCATE;
2437         resetDelay(blk_addr);
2438     }
2439     nextAddr = write_addr + write_size;
2440 }
2441
2442 WriteAllocator*
2443 WriteAllocatorParams::create()
2444 {
2445     return new WriteAllocator(this);
2446 }