src/mem/cache/base.cc

   1 /*
   2  * Copyright (c) 2012-2013, 2018-2019 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Erik Hallnor
  41  *          Nikos Nikoleris
  42  */
  43
  44 /**
  45  * @file
  46  * Definition of BaseCache functions.
  47  */
  48
  49 #include "mem/cache/base.hh"
  50
  51 #include "base/compiler.hh"
  52 #include "base/logging.hh"
  53 #include "debug/Cache.hh"
  54 #include "debug/CacheComp.hh"
  55 #include "debug/CachePort.hh"
  56 #include "debug/CacheRepl.hh"
  57 #include "debug/CacheVerbose.hh"
  58 #include "mem/cache/compressors/base.hh"
  59 #include "mem/cache/mshr.hh"
  60 #include "mem/cache/prefetch/base.hh"
  61 #include "mem/cache/queue_entry.hh"
  62 #include "mem/cache/tags/super_blk.hh"
  63 #include "params/BaseCache.hh"
  64 #include "params/WriteAllocator.hh"
  65 #include "sim/core.hh"
  66
  67 class BaseMasterPort;
  68 class BaseSlavePort;
  69
  70 using namespace std;
  71
  72 BaseCache::CacheSlavePort::CacheSlavePort(const std::string &_name,
  73                                           BaseCache *_cache,
  74                                           const std::string &_label)
  75     : QueuedSlavePort(_name, _cache, queue),
  76       queue(*_cache, *this, true, _label),
  77       blocked(false), mustSendRetry(false),
  78       sendRetryEvent([this]{ processSendRetry(); }, _name)
  79 {
  80 }
  81
  82 BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
  83     : ClockedObject(p),
  84       cpuSidePort (p->name + ".cpu_side", this, "CpuSidePort"),
  85       memSidePort(p->name + ".mem_side", this, "MemSidePort"),
  86       mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
  87       writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
  88       tags(p->tags),
  89       compressor(p->compressor),
  90       prefetcher(p->prefetcher),
  91       writeAllocator(p->write_allocator),
  92       writebackClean(p->writeback_clean),
  93       tempBlockWriteback(nullptr),
  94       writebackTempBlockAtomicEvent([this]{ writebackTempBlockAtomic(); },
  95                                     name(), false,
  96                                     EventBase::Delayed_Writeback_Pri),
  97       blkSize(blk_size),
  98       lookupLatency(p->tag_latency),
  99       dataLatency(p->data_latency),
 100       forwardLatency(p->tag_latency),
 101       fillLatency(p->data_latency),
 102       responseLatency(p->response_latency),
 103       sequentialAccess(p->sequential_access),
 104       numTarget(p->tgts_per_mshr),
 105       forwardSnoops(true),
 106       clusivity(p->clusivity),
 107       isReadOnly(p->is_read_only),
 108       blocked(0),
 109       order(0),
 110       noTargetMSHR(nullptr),
 111       missCount(p->max_miss_count),
 112       addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
 113       system(p->system)
 114 {
 115     // the MSHR queue has no reserve entries as we check the MSHR
 116     // queue on every single allocation, whereas the write queue has
 117     // as many reserve entries as we have MSHRs, since every MSHR may
 118     // eventually require a writeback, and we do not check the write
 119     // buffer before committing to an MSHR
 120
 121     // forward snoops is overridden in init() once we can query
 122     // whether the connected master is actually snooping or not
 123
 124     tempBlock = new TempCacheBlk(blkSize);
 125
 126     tags->tagsInit();
 127     if (prefetcher)
 128         prefetcher->setCache(this);
 129 }
 130
 131 BaseCache::~BaseCache()
 132 {
 133     delete tempBlock;
 134 }
 135
 136 void
 137 BaseCache::CacheSlavePort::setBlocked()
 138 {
 139     assert(!blocked);
 140     DPRINTF(CachePort, "Port is blocking new requests\n");
 141     blocked = true;
 142     // if we already scheduled a retry in this cycle, but it has not yet
 143     // happened, cancel it
 144     if (sendRetryEvent.scheduled()) {
 145         owner.deschedule(sendRetryEvent);
 146         DPRINTF(CachePort, "Port descheduled retry\n");
 147         mustSendRetry = true;
 148     }
 149 }
 150
 151 void
 152 BaseCache::CacheSlavePort::clearBlocked()
 153 {
 154     assert(blocked);
 155     DPRINTF(CachePort, "Port is accepting new requests\n");
 156     blocked = false;
 157     if (mustSendRetry) {
 158         // @TODO: need to find a better time (next cycle?)
 159         owner.schedule(sendRetryEvent, curTick() + 1);
 160     }
 161 }
 162
 163 void
 164 BaseCache::CacheSlavePort::processSendRetry()
 165 {
 166     DPRINTF(CachePort, "Port is sending retry\n");
 167
 168     // reset the flag and call retry
 169     mustSendRetry = false;
 170     sendRetryReq();
 171 }
 172
 173 Addr
 174 BaseCache::regenerateBlkAddr(CacheBlk* blk)
 175 {
 176     if (blk != tempBlock) {
 177         return tags->regenerateBlkAddr(blk);
 178     } else {
 179         return tempBlock->getAddr();
 180     }
 181 }
 182
 183 void
 184 BaseCache::init()
 185 {
 186     if (!cpuSidePort.isConnected() || !memSidePort.isConnected())
 187         fatal("Cache ports on %s are not connected\n", name());
 188     cpuSidePort.sendRangeChange();
 189     forwardSnoops = cpuSidePort.isSnooping();
 190 }
 191
 192 Port &
 193 BaseCache::getPort(const std::string &if_name, PortID idx)
 194 {
 195     if (if_name == "mem_side") {
 196         return memSidePort;
 197     } else if (if_name == "cpu_side") {
 198         return cpuSidePort;
 199     }  else {
 200         return ClockedObject::getPort(if_name, idx);
 201     }
 202 }
 203
 204 bool
 205 BaseCache::inRange(Addr addr) const
 206 {
 207     for (const auto& r : addrRanges) {
 208         if (r.contains(addr)) {
 209             return true;
 210        }
 211     }
 212     return false;
 213 }
 214
 215 void
 216 BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time)
 217 {
 218     if (pkt->needsResponse()) {
 219         // These delays should have been consumed by now
 220         assert(pkt->headerDelay == 0);
 221         assert(pkt->payloadDelay == 0);
 222
 223         pkt->makeTimingResponse();
 224
 225         // In this case we are considering request_time that takes
 226         // into account the delay of the xbar, if any, and just
 227         // lat, neglecting responseLatency, modelling hit latency
 228         // just as the value of lat overriden by access(), which calls
 229         // the calculateAccessLatency() function.
 230         cpuSidePort.schedTimingResp(pkt, request_time);
 231     } else {
 232         DPRINTF(Cache, "%s satisfied %s, no response needed\n", __func__,
 233                 pkt->print());
 234
 235         // queue the packet for deletion, as the sending cache is
 236         // still relying on it; if the block is found in access(),
 237         // CleanEvict and Writeback messages will be deleted
 238         // here as well
 239         pendingDelete.reset(pkt);
 240     }
 241 }
 242
 243 void
 244 BaseCache::handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 245                                Tick forward_time, Tick request_time)
 246 {
 247     if (writeAllocator &&
 248         pkt && pkt->isWrite() && !pkt->req->isUncacheable()) {
 249         writeAllocator->updateMode(pkt->getAddr(), pkt->getSize(),
 250                                    pkt->getBlockAddr(blkSize));
 251     }
 252
 253     if (mshr) {
 254         /// MSHR hit
 255         /// @note writebacks will be checked in getNextMSHR()
 256         /// for any conflicting requests to the same block
 257
 258         //@todo remove hw_pf here
 259
 260         // Coalesce unless it was a software prefetch (see above).
 261         if (pkt) {
 262             assert(!pkt->isWriteback());
 263             // CleanEvicts corresponding to blocks which have
 264             // outstanding requests in MSHRs are simply sunk here
 265             if (pkt->cmd == MemCmd::CleanEvict) {
 266                 pendingDelete.reset(pkt);
 267             } else if (pkt->cmd == MemCmd::WriteClean) {
 268                 // A WriteClean should never coalesce with any
 269                 // outstanding cache maintenance requests.
 270
 271                 // We use forward_time here because there is an
 272                 // uncached memory write, forwarded to WriteBuffer.
 273                 allocateWriteBuffer(pkt, forward_time);
 274             } else {
 275                 DPRINTF(Cache, "%s coalescing MSHR for %s\n", __func__,
 276                         pkt->print());
 277
 278                 assert(pkt->req->masterId() < system->maxMasters());
 279                 mshr_hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
 280
 281                 // We use forward_time here because it is the same
 282                 // considering new targets. We have multiple
 283                 // requests for the same address here. It
 284                 // specifies the latency to allocate an internal
 285                 // buffer and to schedule an event to the queued
 286                 // port and also takes into account the additional
 287                 // delay of the xbar.
 288                 mshr->allocateTarget(pkt, forward_time, order++,
 289                                      allocOnFill(pkt->cmd));
 290                 if (mshr->getNumTargets() == numTarget) {
 291                     noTargetMSHR = mshr;
 292                     setBlocked(Blocked_NoTargets);
 293                     // need to be careful with this... if this mshr isn't
 294                     // ready yet (i.e. time > curTick()), we don't want to
 295                     // move it ahead of mshrs that are ready
 296                     // mshrQueue.moveToFront(mshr);
 297                 }
 298             }
 299         }
 300     } else {
 301         // no MSHR
 302         assert(pkt->req->masterId() < system->maxMasters());
 303         mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
 304
 305         if (pkt->isEviction() || pkt->cmd == MemCmd::WriteClean) {
 306             // We use forward_time here because there is an
 307             // writeback or writeclean, forwarded to WriteBuffer.
 308             allocateWriteBuffer(pkt, forward_time);
 309         } else {
 310             if (blk && blk->isValid()) {
 311                 // If we have a write miss to a valid block, we
 312                 // need to mark the block non-readable.  Otherwise
 313                 // if we allow reads while there's an outstanding
 314                 // write miss, the read could return stale data
 315                 // out of the cache block... a more aggressive
 316                 // system could detect the overlap (if any) and
 317                 // forward data out of the MSHRs, but we don't do
 318                 // that yet.  Note that we do need to leave the
 319                 // block valid so that it stays in the cache, in
 320                 // case we get an upgrade response (and hence no
 321                 // new data) when the write miss completes.
 322                 // As long as CPUs do proper store/load forwarding
 323                 // internally, and have a sufficiently weak memory
 324                 // model, this is probably unnecessary, but at some
 325                 // point it must have seemed like we needed it...
 326                 assert((pkt->needsWritable() && !blk->isWritable()) ||
 327                        pkt->req->isCacheMaintenance());
 328                 blk->status &= ~BlkReadable;
 329             }
 330             // Here we are using forward_time, modelling the latency of
 331             // a miss (outbound) just as forwardLatency, neglecting the
 332             // lookupLatency component.
 333             allocateMissBuffer(pkt, forward_time);
 334         }
 335     }
 336 }
 337
 338 void
 339 BaseCache::recvTimingReq(PacketPtr pkt)
 340 {
 341     // anything that is merely forwarded pays for the forward latency and
 342     // the delay provided by the crossbar
 343     Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;
 344
 345     // Note that lat is passed by reference here. The function
 346     // access() will set the lat value.
 347     Cycles lat;
 348     CacheBlk *blk = nullptr;
 349     bool satisfied = access(pkt, blk, lat);
 350
 351     // Here we charge the headerDelay that takes into account the latencies
 352     // of the bus, if the packet comes from it.
 353     // The latency charged is just the value set by the access() function.
 354     // In case of a hit we are neglecting response latency.
 355     // In case of a miss we are neglecting forward latency.
 356     Tick request_time = clockEdge(lat);
 357     // Here we reset the timing of the packet.
 358     pkt->headerDelay = pkt->payloadDelay = 0;
 359
 360     if (satisfied) {
 361         // notify before anything else as later handleTimingReqHit might turn
 362         // the packet in a response
 363         ppHit->notify(pkt);
 364
 365         if (prefetcher && blk && blk->wasPrefetched()) {
 366             blk->status &= ~BlkHWPrefetched;
 367         }
 368
 369         handleTimingReqHit(pkt, blk, request_time);
 370     } else {
 371         handleTimingReqMiss(pkt, blk, forward_time, request_time);
 372
 373         ppMiss->notify(pkt);
 374     }
 375
 376     if (prefetcher) {
 377         // track time of availability of next prefetch, if any
 378         Tick next_pf_time = prefetcher->nextPrefetchReadyTime();
 379         if (next_pf_time != MaxTick) {
 380             schedMemSideSendEvent(next_pf_time);
 381         }
 382     }
 383 }
 384
 385 void
 386 BaseCache::handleUncacheableWriteResp(PacketPtr pkt)
 387 {
 388     Tick completion_time = clockEdge(responseLatency) +
 389         pkt->headerDelay + pkt->payloadDelay;
 390
 391     // Reset the bus additional time as it is now accounted for
 392     pkt->headerDelay = pkt->payloadDelay = 0;
 393
 394     cpuSidePort.schedTimingResp(pkt, completion_time);
 395 }
 396
 397 void
 398 BaseCache::recvTimingResp(PacketPtr pkt)
 399 {
 400     assert(pkt->isResponse());
 401
 402     // all header delay should be paid for by the crossbar, unless
 403     // this is a prefetch response from above
 404     panic_if(pkt->headerDelay != 0 && pkt->cmd != MemCmd::HardPFResp,
 405              "%s saw a non-zero packet delay\n", name());
 406
 407     const bool is_error = pkt->isError();
 408
 409     if (is_error) {
 410         DPRINTF(Cache, "%s: Cache received %s with error\n", __func__,
 411                 pkt->print());
 412     }
 413
 414     DPRINTF(Cache, "%s: Handling response %s\n", __func__,
 415             pkt->print());
 416
 417     // if this is a write, we should be looking at an uncacheable
 418     // write
 419     if (pkt->isWrite()) {
 420         assert(pkt->req->isUncacheable());
 421         handleUncacheableWriteResp(pkt);
 422         return;
 423     }
 424
 425     // we have dealt with any (uncacheable) writes above, from here on
 426     // we know we are dealing with an MSHR due to a miss or a prefetch
 427     MSHR *mshr = dynamic_cast<MSHR*>(pkt->popSenderState());
 428     assert(mshr);
 429
 430     if (mshr == noTargetMSHR) {
 431         // we always clear at least one target
 432         clearBlocked(Blocked_NoTargets);
 433         noTargetMSHR = nullptr;
 434     }
 435
 436     // Initial target is used just for stats
 437     QueueEntry::Target *initial_tgt = mshr->getTarget();
 438     int stats_cmd_idx = initial_tgt->pkt->cmdToIndex();
 439     Tick miss_latency = curTick() - initial_tgt->recvTime;
 440
 441     if (pkt->req->isUncacheable()) {
 442         assert(pkt->req->masterId() < system->maxMasters());
 443         mshr_uncacheable_lat[stats_cmd_idx][pkt->req->masterId()] +=
 444             miss_latency;
 445     } else {
 446         assert(pkt->req->masterId() < system->maxMasters());
 447         mshr_miss_latency[stats_cmd_idx][pkt->req->masterId()] +=
 448             miss_latency;
 449     }
 450
 451     bool is_fill = !mshr->isForward &&
 452         (pkt->isRead() || pkt->cmd == MemCmd::UpgradeResp ||
 453          mshr->wasWholeLineWrite);
 454
 455     // make sure that if the mshr was due to a whole line write then
 456     // the response is an invalidation
 457     assert(!mshr->wasWholeLineWrite || pkt->isInvalidate());
 458
 459     CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure());
 460
 461     if (is_fill && !is_error) {
 462         DPRINTF(Cache, "Block for addr %#llx being updated in Cache\n",
 463                 pkt->getAddr());
 464
 465         const bool allocate = (writeAllocator && mshr->wasWholeLineWrite) ?
 466             writeAllocator->allocate() : mshr->allocOnFill();
 467         blk = handleFill(pkt, blk, allocate);
 468         assert(blk != nullptr);
 469         ppFill->notify(pkt);
 470     }
 471
 472     if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
 473         // The block was marked not readable while there was a pending
 474         // cache maintenance operation, restore its flag.
 475         blk->status |= BlkReadable;
 476
 477         // This was a cache clean operation (without invalidate)
 478         // and we have a copy of the block already. Since there
 479         // is no invalidation, we can promote targets that don't
 480         // require a writable copy
 481         mshr->promoteReadable();
 482     }
 483
 484     if (blk && blk->isWritable() && !pkt->req->isCacheInvalidate()) {
 485         // If at this point the referenced block is writable and the
 486         // response is not a cache invalidate, we promote targets that
 487         // were deferred as we couldn't guarrantee a writable copy
 488         mshr->promoteWritable();
 489     }
 490
 491     serviceMSHRTargets(mshr, pkt, blk);
 492
 493     if (mshr->promoteDeferredTargets()) {
 494         // avoid later read getting stale data while write miss is
 495         // outstanding.. see comment in timingAccess()
 496         if (blk) {
 497             blk->status &= ~BlkReadable;
 498         }
 499         mshrQueue.markPending(mshr);
 500         schedMemSideSendEvent(clockEdge() + pkt->payloadDelay);
 501     } else {
 502         // while we deallocate an mshr from the queue we still have to
 503         // check the isFull condition before and after as we might
 504         // have been using the reserved entries already
 505         const bool was_full = mshrQueue.isFull();
 506         mshrQueue.deallocate(mshr);
 507         if (was_full && !mshrQueue.isFull()) {
 508             clearBlocked(Blocked_NoMSHRs);
 509         }
 510
 511         // Request the bus for a prefetch if this deallocation freed enough
 512         // MSHRs for a prefetch to take place
 513         if (prefetcher && mshrQueue.canPrefetch()) {
 514             Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(),
 515                                          clockEdge());
 516             if (next_pf_time != MaxTick)
 517                 schedMemSideSendEvent(next_pf_time);
 518         }
 519     }
 520
 521     // if we used temp block, check to see if its valid and then clear it out
 522     if (blk == tempBlock && tempBlock->isValid()) {
 523         evictBlock(blk, clockEdge(forwardLatency) + pkt->headerDelay);
 524     }
 525
 526     DPRINTF(CacheVerbose, "%s: Leaving with %s\n", __func__, pkt->print());
 527     delete pkt;
 528 }
 529
 530
 531 Tick
 532 BaseCache::recvAtomic(PacketPtr pkt)
 533 {
 534     // should assert here that there are no outstanding MSHRs or
 535     // writebacks... that would mean that someone used an atomic
 536     // access in timing mode
 537
 538     // We use lookupLatency here because it is used to specify the latency
 539     // to access.
 540     Cycles lat = lookupLatency;
 541
 542     CacheBlk *blk = nullptr;
 543     bool satisfied = access(pkt, blk, lat);
 544
 545     if (pkt->isClean() && blk && blk->isDirty()) {
 546         // A cache clean opearation is looking for a dirty
 547         // block. If a dirty block is encountered a WriteClean
 548         // will update any copies to the path to the memory
 549         // until the point of reference.
 550         DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
 551                 __func__, pkt->print(), blk->print());
 552         PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(), pkt->id);
 553         pkt->setSatisfied();
 554         doWritebacksAtomic(wb_pkt);
 555     }
 556
 557     if (!satisfied) {
 558         lat += handleAtomicReqMiss(pkt, blk);
 559     }
 560
 561     // Note that we don't invoke the prefetcher at all in atomic mode.
 562     // It's not clear how to do it properly, particularly for
 563     // prefetchers that aggressively generate prefetch candidates and
 564     // rely on bandwidth contention to throttle them; these will tend
 565     // to pollute the cache in atomic mode since there is no bandwidth
 566     // contention.  If we ever do want to enable prefetching in atomic
 567     // mode, though, this is the place to do it... see timingAccess()
 568     // for an example (though we'd want to issue the prefetch(es)
 569     // immediately rather than calling requestMemSideBus() as we do
 570     // there).
 571
 572     // if we used temp block, check to see if its valid and if so
 573     // clear it out, but only do so after the call to recvAtomic is
 574     // finished so that any downstream observers (such as a snoop
 575     // filter), first see the fill, and only then see the eviction
 576     if (blk == tempBlock && tempBlock->isValid()) {
 577         // the atomic CPU calls recvAtomic for fetch and load/store
 578         // sequentuially, and we may already have a tempBlock
 579         // writeback from the fetch that we have not yet sent
 580         if (tempBlockWriteback) {
 581             // if that is the case, write the prevoius one back, and
 582             // do not schedule any new event
 583             writebackTempBlockAtomic();
 584         } else {
 585             // the writeback/clean eviction happens after the call to
 586             // recvAtomic has finished (but before any successive
 587             // calls), so that the response handling from the fill is
 588             // allowed to happen first
 589             schedule(writebackTempBlockAtomicEvent, curTick());
 590         }
 591
 592         tempBlockWriteback = evictBlock(blk);
 593     }
 594
 595     if (pkt->needsResponse()) {
 596         pkt->makeAtomicResponse();
 597     }
 598
 599     return lat * clockPeriod();
 600 }
 601
 602 void
 603 BaseCache::functionalAccess(PacketPtr pkt, bool from_cpu_side)
 604 {
 605     Addr blk_addr = pkt->getBlockAddr(blkSize);
 606     bool is_secure = pkt->isSecure();
 607     CacheBlk *blk = tags->findBlock(pkt->getAddr(), is_secure);
 608     MSHR *mshr = mshrQueue.findMatch(blk_addr, is_secure);
 609
 610     pkt->pushLabel(name());
 611
 612     CacheBlkPrintWrapper cbpw(blk);
 613
 614     // Note that just because an L2/L3 has valid data doesn't mean an
 615     // L1 doesn't have a more up-to-date modified copy that still
 616     // needs to be found.  As a result we always update the request if
 617     // we have it, but only declare it satisfied if we are the owner.
 618
 619     // see if we have data at all (owned or otherwise)
 620     bool have_data = blk && blk->isValid()
 621         && pkt->trySatisfyFunctional(&cbpw, blk_addr, is_secure, blkSize,
 622                                      blk->data);
 623
 624     // data we have is dirty if marked as such or if we have an
 625     // in-service MSHR that is pending a modified line
 626     bool have_dirty =
 627         have_data && (blk->isDirty() ||
 628                       (mshr && mshr->inService && mshr->isPendingModified()));
 629
 630     bool done = have_dirty ||
 631         cpuSidePort.trySatisfyFunctional(pkt) ||
 632         mshrQueue.trySatisfyFunctional(pkt) ||
 633         writeBuffer.trySatisfyFunctional(pkt) ||
 634         memSidePort.trySatisfyFunctional(pkt);
 635
 636     DPRINTF(CacheVerbose, "%s: %s %s%s%s\n", __func__,  pkt->print(),
 637             (blk && blk->isValid()) ? "valid " : "",
 638             have_data ? "data " : "", done ? "done " : "");
 639
 640     // We're leaving the cache, so pop cache->name() label
 641     pkt->popLabel();
 642
 643     if (done) {
 644         pkt->makeResponse();
 645     } else {
 646         // if it came as a request from the CPU side then make sure it
 647         // continues towards the memory side
 648         if (from_cpu_side) {
 649             memSidePort.sendFunctional(pkt);
 650         } else if (cpuSidePort.isSnooping()) {
 651             // if it came from the memory side, it must be a snoop request
 652             // and we should only forward it if we are forwarding snoops
 653             cpuSidePort.sendFunctionalSnoop(pkt);
 654         }
 655     }
 656 }
 657
 658
 659 void
 660 BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
 661 {
 662     assert(pkt->isRequest());
 663
 664     uint64_t overwrite_val;
 665     bool overwrite_mem;
 666     uint64_t condition_val64;
 667     uint32_t condition_val32;
 668
 669     int offset = pkt->getOffset(blkSize);
 670     uint8_t *blk_data = blk->data + offset;
 671
 672     assert(sizeof(uint64_t) >= pkt->getSize());
 673
 674     overwrite_mem = true;
 675     // keep a copy of our possible write value, and copy what is at the
 676     // memory address into the packet
 677     pkt->writeData((uint8_t *)&overwrite_val);
 678     pkt->setData(blk_data);
 679
 680     if (pkt->req->isCondSwap()) {
 681         if (pkt->getSize() == sizeof(uint64_t)) {
 682             condition_val64 = pkt->req->getExtraData();
 683             overwrite_mem = !std::memcmp(&condition_val64, blk_data,
 684                                          sizeof(uint64_t));
 685         } else if (pkt->getSize() == sizeof(uint32_t)) {
 686             condition_val32 = (uint32_t)pkt->req->getExtraData();
 687             overwrite_mem = !std::memcmp(&condition_val32, blk_data,
 688                                          sizeof(uint32_t));
 689         } else
 690             panic("Invalid size for conditional read/write\n");
 691     }
 692
 693     if (overwrite_mem) {
 694         std::memcpy(blk_data, &overwrite_val, pkt->getSize());
 695         blk->status |= BlkDirty;
 696     }
 697 }
 698
 699 QueueEntry*
 700 BaseCache::getNextQueueEntry()
 701 {
 702     // Check both MSHR queue and write buffer for potential requests,
 703     // note that null does not mean there is no request, it could
 704     // simply be that it is not ready
 705     MSHR *miss_mshr  = mshrQueue.getNext();
 706     WriteQueueEntry *wq_entry = writeBuffer.getNext();
 707
 708     // If we got a write buffer request ready, first priority is a
 709     // full write buffer, otherwise we favour the miss requests
 710     if (wq_entry && (writeBuffer.isFull() || !miss_mshr)) {
 711         // need to search MSHR queue for conflicting earlier miss.
 712         MSHR *conflict_mshr = mshrQueue.findPending(wq_entry);
 713
 714         if (conflict_mshr && conflict_mshr->order < wq_entry->order) {
 715             // Service misses in order until conflict is cleared.
 716             return conflict_mshr;
 717
 718             // @todo Note that we ignore the ready time of the conflict here
 719         }
 720
 721         // No conflicts; issue write
 722         return wq_entry;
 723     } else if (miss_mshr) {
 724         // need to check for conflicting earlier writeback
 725         WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr);
 726         if (conflict_mshr) {
 727             // not sure why we don't check order here... it was in the
 728             // original code but commented out.
 729
 730             // The only way this happens is if we are
 731             // doing a write and we didn't have permissions
 732             // then subsequently saw a writeback (owned got evicted)
 733             // We need to make sure to perform the writeback first
 734             // To preserve the dirty data, then we can issue the write
 735
 736             // should we return wq_entry here instead?  I.e. do we
 737             // have to flush writes in order?  I don't think so... not
 738             // for Alpha anyway.  Maybe for x86?
 739             return conflict_mshr;
 740
 741             // @todo Note that we ignore the ready time of the conflict here
 742         }
 743
 744         // No conflicts; issue read
 745         return miss_mshr;
 746     }
 747
 748     // fall through... no pending requests.  Try a prefetch.
 749     assert(!miss_mshr && !wq_entry);
 750     if (prefetcher && mshrQueue.canPrefetch()) {
 751         // If we have a miss queue slot, we can try a prefetch
 752         PacketPtr pkt = prefetcher->getPacket();
 753         if (pkt) {
 754             Addr pf_addr = pkt->getBlockAddr(blkSize);
 755             if (!tags->findBlock(pf_addr, pkt->isSecure()) &&
 756                 !mshrQueue.findMatch(pf_addr, pkt->isSecure()) &&
 757                 !writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
 758                 // Update statistic on number of prefetches issued
 759                 // (hwpf_mshr_misses)
 760                 assert(pkt->req->masterId() < system->maxMasters());
 761                 mshr_misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
 762
 763                 // allocate an MSHR and return it, note
 764                 // that we send the packet straight away, so do not
 765                 // schedule the send
 766                 return allocateMissBuffer(pkt, curTick(), false);
 767             } else {
 768                 // free the request and packet
 769                 delete pkt;
 770             }
 771         }
 772     }
 773
 774     return nullptr;
 775 }
 776
 777 bool
 778 BaseCache::updateCompressionData(CacheBlk *blk, const uint64_t* data,
 779     uint32_t delay, Cycles tag_latency)
 780 {
 781     // tempBlock does not exist in the tags, so don't do anything for it.
 782     if (blk == tempBlock) {
 783         return true;
 784     }
 785
 786     // Get superblock of the given block
 787     CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
 788     const SuperBlk* superblock = static_cast<const SuperBlk*>(
 789         compression_blk->getSectorBlock());
 790
 791     // The compressor is called to compress the updated data, so that its
 792     // metadata can be updated.
 793     std::size_t compression_size = 0;
 794     Cycles compression_lat = Cycles(0);
 795     Cycles decompression_lat = Cycles(0);
 796     compressor->compress(data, compression_lat, decompression_lat,
 797                          compression_size);
 798
 799     // If block's compression factor increased, it may not be co-allocatable
 800     // anymore. If so, some blocks might need to be evicted to make room for
 801     // the bigger block
 802
 803     // Get previous compressed size
 804     const std::size_t M5_VAR_USED prev_size = compression_blk->getSizeBits();
 805
 806     // Check if new data is co-allocatable
 807     const bool is_co_allocatable = superblock->isCompressed(compression_blk) &&
 808         superblock->canCoAllocate(compression_size);
 809
 810     // If block was compressed, possibly co-allocated with other blocks, and
 811     // cannot be co-allocated anymore, one or more blocks must be evicted to
 812     // make room for the expanded block. As of now we decide to evict the co-
 813     // allocated blocks to make room for the expansion, but other approaches
 814     // that take the replacement data of the superblock into account may
 815     // generate better results
 816     std::vector<CacheBlk*> evict_blks;
 817     const bool was_compressed = compression_blk->isCompressed();
 818     if (was_compressed && !is_co_allocatable) {
 819         // Get all co-allocated blocks
 820         for (const auto& sub_blk : superblock->blks) {
 821             if (sub_blk->isValid() && (compression_blk != sub_blk)) {
 822                 // Check for transient state allocations. If any of the
 823                 // entries listed for eviction has a transient state, the
 824                 // allocation fails
 825                 const Addr repl_addr = regenerateBlkAddr(sub_blk);
 826                 const MSHR *repl_mshr =
 827                     mshrQueue.findMatch(repl_addr, sub_blk->isSecure());
 828                 if (repl_mshr) {
 829                     DPRINTF(CacheRepl, "Aborting data expansion of %s due " \
 830                             "to replacement of block in transient state: %s\n",
 831                             compression_blk->print(), sub_blk->print());
 832                     // Too hard to replace block with transient state, so it
 833                     // cannot be evicted. Mark the update as failed and expect
 834                     // the caller to evict this block. Since this is called
 835                     // only when writebacks arrive, and packets do not contain
 836                     // compressed data, there is no need to decompress
 837                     compression_blk->setSizeBits(blkSize * 8);
 838                     compression_blk->setDecompressionLatency(Cycles(0));
 839                     compression_blk->setUncompressed();
 840                     return false;
 841                 }
 842
 843                 evict_blks.push_back(sub_blk);
 844             }
 845         }
 846
 847         // Update the number of data expansions
 848         dataExpansions++;
 849
 850         DPRINTF(CacheComp, "Data expansion: expanding [%s] from %d to %d bits"
 851                 "\n", blk->print(), prev_size, compression_size);
 852     }
 853
 854     // We always store compressed blocks when possible
 855     if (is_co_allocatable) {
 856         compression_blk->setCompressed();
 857     } else {
 858         compression_blk->setUncompressed();
 859     }
 860     compression_blk->setSizeBits(compression_size);
 861     compression_blk->setDecompressionLatency(decompression_lat);
 862
 863     // Evict valid blocks
 864     for (const auto& evict_blk : evict_blks) {
 865         if (evict_blk->isValid()) {
 866             if (evict_blk->wasPrefetched()) {
 867                 unusedPrefetches++;
 868             }
 869             Cycles lat = calculateAccessLatency(evict_blk, delay, tag_latency);
 870             evictBlock(evict_blk, clockEdge(lat + forwardLatency));
 871         }
 872     }
 873
 874     return true;
 875 }
 876
 877 void
 878 BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
 879 {
 880     assert(pkt->isRequest());
 881
 882     assert(blk && blk->isValid());
 883     // Occasionally this is not true... if we are a lower-level cache
 884     // satisfying a string of Read and ReadEx requests from
 885     // upper-level caches, a Read will mark the block as shared but we
 886     // can satisfy a following ReadEx anyway since we can rely on the
 887     // Read requester(s) to have buffered the ReadEx snoop and to
 888     // invalidate their blocks after receiving them.
 889     // assert(!pkt->needsWritable() || blk->isWritable());
 890     assert(pkt->getOffset(blkSize) + pkt->getSize() <= blkSize);
 891
 892     // Check RMW operations first since both isRead() and
 893     // isWrite() will be true for them
 894     if (pkt->cmd == MemCmd::SwapReq) {
 895         if (pkt->isAtomicOp()) {
 896             // extract data from cache and save it into the data field in
 897             // the packet as a return value from this atomic op
 898             int offset = tags->extractBlkOffset(pkt->getAddr());
 899             uint8_t *blk_data = blk->data + offset;
 900             pkt->setData(blk_data);
 901
 902             // execute AMO operation
 903             (*(pkt->getAtomicOp()))(blk_data);
 904
 905             // set block status to dirty
 906             blk->status |= BlkDirty;
 907         } else {
 908             cmpAndSwap(blk, pkt);
 909         }
 910     } else if (pkt->isWrite()) {
 911         // we have the block in a writable state and can go ahead,
 912         // note that the line may be also be considered writable in
 913         // downstream caches along the path to memory, but always
 914         // Exclusive, and never Modified
 915         assert(blk->isWritable());
 916         // Write or WriteLine at the first cache with block in writable state
 917         if (blk->checkWrite(pkt)) {
 918             pkt->writeDataToBlock(blk->data, blkSize);
 919         }
 920         // Always mark the line as dirty (and thus transition to the
 921         // Modified state) even if we are a failed StoreCond so we
 922         // supply data to any snoops that have appended themselves to
 923         // this cache before knowing the store will fail.
 924         blk->status |= BlkDirty;
 925         DPRINTF(CacheVerbose, "%s for %s (write)\n", __func__, pkt->print());
 926     } else if (pkt->isRead()) {
 927         if (pkt->isLLSC()) {
 928             blk->trackLoadLocked(pkt);
 929         }
 930
 931         // all read responses have a data payload
 932         assert(pkt->hasRespData());
 933         pkt->setDataFromBlock(blk->data, blkSize);
 934     } else if (pkt->isUpgrade()) {
 935         // sanity check
 936         assert(!pkt->hasSharers());
 937
 938         if (blk->isDirty()) {
 939             // we were in the Owned state, and a cache above us that
 940             // has the line in Shared state needs to be made aware
 941             // that the data it already has is in fact dirty
 942             pkt->setCacheResponding();
 943             blk->status &= ~BlkDirty;
 944         }
 945     } else if (pkt->isClean()) {
 946         blk->status &= ~BlkDirty;
 947     } else {
 948         assert(pkt->isInvalidate());
 949         invalidateBlock(blk);
 950         DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
 951                 pkt->print());
 952     }
 953 }
 954
 955 /////////////////////////////////////////////////////
 956 //
 957 // Access path: requests coming in from the CPU side
 958 //
 959 /////////////////////////////////////////////////////
 960 Cycles
 961 BaseCache::calculateTagOnlyLatency(const uint32_t delay,
 962                                    const Cycles lookup_lat) const
 963 {
 964     // A tag-only access has to wait for the packet to arrive in order to
 965     // perform the tag lookup.
 966     return ticksToCycles(delay) + lookup_lat;
 967 }
 968
 969 Cycles
 970 BaseCache::calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
 971                                   const Cycles lookup_lat) const
 972 {
 973     Cycles lat(0);
 974
 975     if (blk != nullptr) {
 976         // As soon as the access arrives, for sequential accesses first access
 977         // tags, then the data entry. In the case of parallel accesses the
 978         // latency is dictated by the slowest of tag and data latencies.
 979         if (sequentialAccess) {
 980             lat = ticksToCycles(delay) + lookup_lat + dataLatency;
 981         } else {
 982             lat = ticksToCycles(delay) + std::max(lookup_lat, dataLatency);
 983         }
 984
 985         // Check if the block to be accessed is available. If not, apply the
 986         // access latency on top of when the block is ready to be accessed.
 987         const Tick tick = curTick() + delay;
 988         const Tick when_ready = blk->getWhenReady();
 989         if (when_ready > tick &&
 990             ticksToCycles(when_ready - tick) > lat) {
 991             lat += ticksToCycles(when_ready - tick);
 992         }
 993     } else {
 994         // In case of a miss, we neglect the data access in a parallel
 995         // configuration (i.e., the data access will be stopped as soon as
 996         // we find out it is a miss), and use the tag-only latency.
 997         lat = calculateTagOnlyLatency(delay, lookup_lat);
 998     }
 999
1000     return lat;
1001 }
1002
1003 bool
1004 BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat)
1005 {
1006     // sanity check
1007     assert(pkt->isRequest());
1008
1009     chatty_assert(!(isReadOnly && pkt->isWrite()),
1010                   "Should never see a write in a read-only cache %s\n",
1011                   name());
1012
1013     // Access block in the tags
1014     Cycles tag_latency(0);
1015     blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);
1016
1017     DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
1018             blk ? "hit " + blk->print() : "miss");
1019
1020     if (pkt->req->isCacheMaintenance()) {
1021         // A cache maintenance operation is always forwarded to the
1022         // memory below even if the block is found in dirty state.
1023
1024         // We defer any changes to the state of the block until we
1025         // create and mark as in service the mshr for the downstream
1026         // packet.
1027
1028         // Calculate access latency on top of when the packet arrives. This
1029         // takes into account the bus delay.
1030         lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1031
1032         return false;
1033     }
1034
1035     if (pkt->isEviction()) {
1036         // We check for presence of block in above caches before issuing
1037         // Writeback or CleanEvict to write buffer. Therefore the only
1038         // possible cases can be of a CleanEvict packet coming from above
1039         // encountering a Writeback generated in this cache peer cache and
1040         // waiting in the write buffer. Cases of upper level peer caches
1041         // generating CleanEvict and Writeback or simply CleanEvict and
1042         // CleanEvict almost simultaneously will be caught by snoops sent out
1043         // by crossbar.
1044         WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
1045                                                           pkt->isSecure());
1046         if (wb_entry) {
1047             assert(wb_entry->getNumTargets() == 1);
1048             PacketPtr wbPkt = wb_entry->getTarget()->pkt;
1049             assert(wbPkt->isWriteback());
1050
1051             if (pkt->isCleanEviction()) {
1052                 // The CleanEvict and WritebackClean snoops into other
1053                 // peer caches of the same level while traversing the
1054                 // crossbar. If a copy of the block is found, the
1055                 // packet is deleted in the crossbar. Hence, none of
1056                 // the other upper level caches connected to this
1057                 // cache have the block, so we can clear the
1058                 // BLOCK_CACHED flag in the Writeback if set and
1059                 // discard the CleanEvict by returning true.
1060                 wbPkt->clearBlockCached();
1061
1062                 // A clean evict does not need to access the data array
1063                 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1064
1065                 return true;
1066             } else {
1067                 assert(pkt->cmd == MemCmd::WritebackDirty);
1068                 // Dirty writeback from above trumps our clean
1069                 // writeback... discard here
1070                 // Note: markInService will remove entry from writeback buffer.
1071                 markInService(wb_entry);
1072                 delete wbPkt;
1073             }
1074         }
1075     }
1076
1077     // Writeback handling is special case.  We can write the block into
1078     // the cache without having a writeable copy (or any copy at all).
1079     if (pkt->isWriteback()) {
1080         assert(blkSize == pkt->getSize());
1081
1082         // we could get a clean writeback while we are having
1083         // outstanding accesses to a block, do the simple thing for
1084         // now and drop the clean writeback so that we do not upset
1085         // any ordering/decisions about ownership already taken
1086         if (pkt->cmd == MemCmd::WritebackClean &&
1087             mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure())) {
1088             DPRINTF(Cache, "Clean writeback %#llx to block with MSHR, "
1089                     "dropping\n", pkt->getAddr());
1090
1091             // A writeback searches for the block, then writes the data.
1092             // As the writeback is being dropped, the data is not touched,
1093             // and we just had to wait for the time to find a match in the
1094             // MSHR. As of now assume a mshr queue search takes as long as
1095             // a tag lookup for simplicity.
1096             lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1097
1098             return true;
1099         }
1100
1101         if (!blk) {
1102             // need to do a replacement
1103             blk = allocateBlock(pkt, tag_latency);
1104             if (!blk) {
1105                 // no replaceable block available: give up, fwd to next level.
1106                 incMissCount(pkt);
1107
1108                 // A writeback searches for the block, then writes the data.
1109                 // As the block could not be found, it was a tag-only access.
1110                 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1111
1112                 return false;
1113             }
1114
1115             blk->status |= BlkReadable;
1116         } else if (compressor) {
1117             // This is an overwrite to an existing block, therefore we need
1118             // to check for data expansion (i.e., block was compressed with
1119             // a smaller size, and now it doesn't fit the entry anymore).
1120             // If that is the case we might need to evict blocks.
1121             if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1122                 pkt->headerDelay, tag_latency)) {
1123                 // This is a failed data expansion (write), which happened
1124                 // after finding the replacement entries and accessing the
1125                 // block's data. There were no replaceable entries available
1126                 // to make room for the expanded block, and since it does not
1127                 // fit anymore and it has been properly updated to contain
1128                 // the new data, forward it to the next level
1129                 lat = calculateAccessLatency(blk, pkt->headerDelay,
1130                                              tag_latency);
1131                 invalidateBlock(blk);
1132                 return false;
1133             }
1134         }
1135
1136         // only mark the block dirty if we got a writeback command,
1137         // and leave it as is for a clean writeback
1138         if (pkt->cmd == MemCmd::WritebackDirty) {
1139             // TODO: the coherent cache can assert(!blk->isDirty());
1140             blk->status |= BlkDirty;
1141         }
1142         // if the packet does not have sharers, it is passing
1143         // writable, and we got the writeback in Modified or Exclusive
1144         // state, if not we are in the Owned or Shared state
1145         if (!pkt->hasSharers()) {
1146             blk->status |= BlkWritable;
1147         }
1148         // nothing else to do; writeback doesn't expect response
1149         assert(!pkt->needsResponse());
1150         pkt->writeDataToBlock(blk->data, blkSize);
1151         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1152         incHitCount(pkt);
1153
1154         // A writeback searches for the block, then writes the data
1155         lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1156
1157         // When the packet metadata arrives, the tag lookup will be done while
1158         // the payload is arriving. Then the block will be ready to access as
1159         // soon as the fill is done
1160         blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1161             std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1162
1163         return true;
1164     } else if (pkt->cmd == MemCmd::CleanEvict) {
1165         // A CleanEvict does not need to access the data array
1166         lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1167
1168         if (blk) {
1169             // Found the block in the tags, need to stop CleanEvict from
1170             // propagating further down the hierarchy. Returning true will
1171             // treat the CleanEvict like a satisfied write request and delete
1172             // it.
1173             return true;
1174         }
1175         // We didn't find the block here, propagate the CleanEvict further
1176         // down the memory hierarchy. Returning false will treat the CleanEvict
1177         // like a Writeback which could not find a replaceable block so has to
1178         // go to next level.
1179         return false;
1180     } else if (pkt->cmd == MemCmd::WriteClean) {
1181         // WriteClean handling is a special case. We can allocate a
1182         // block directly if it doesn't exist and we can update the
1183         // block immediately. The WriteClean transfers the ownership
1184         // of the block as well.
1185         assert(blkSize == pkt->getSize());
1186
1187         if (!blk) {
1188             if (pkt->writeThrough()) {
1189                 // A writeback searches for the block, then writes the data.
1190                 // As the block could not be found, it was a tag-only access.
1191                 lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1192
1193                 // if this is a write through packet, we don't try to
1194                 // allocate if the block is not present
1195                 return false;
1196             } else {
1197                 // a writeback that misses needs to allocate a new block
1198                 blk = allocateBlock(pkt, tag_latency);
1199                 if (!blk) {
1200                     // no replaceable block available: give up, fwd to
1201                     // next level.
1202                     incMissCount(pkt);
1203
1204                     // A writeback searches for the block, then writes the
1205                     // data. As the block could not be found, it was a tag-only
1206                     // access.
1207                     lat = calculateTagOnlyLatency(pkt->headerDelay,
1208                                                   tag_latency);
1209
1210                     return false;
1211                 }
1212
1213                 blk->status |= BlkReadable;
1214             }
1215         } else if (compressor) {
1216             // This is an overwrite to an existing block, therefore we need
1217             // to check for data expansion (i.e., block was compressed with
1218             // a smaller size, and now it doesn't fit the entry anymore).
1219             // If that is the case we might need to evict blocks.
1220             if (!updateCompressionData(blk, pkt->getConstPtr<uint64_t>(),
1221                 pkt->headerDelay, tag_latency)) {
1222                 // This is a failed data expansion (write), which happened
1223                 // after finding the replacement entries and accessing the
1224                 // block's data. There were no replaceable entries available
1225                 // to make room for the expanded block, and since it does not
1226                 // fit anymore and it has been properly updated to contain
1227                 // the new data, forward it to the next level
1228                 lat = calculateAccessLatency(blk, pkt->headerDelay,
1229                                              tag_latency);
1230                 invalidateBlock(blk);
1231                 return false;
1232             }
1233         }
1234
1235         // at this point either this is a writeback or a write-through
1236         // write clean operation and the block is already in this
1237         // cache, we need to update the data and the block flags
1238         assert(blk);
1239         // TODO: the coherent cache can assert(!blk->isDirty());
1240         if (!pkt->writeThrough()) {
1241             blk->status |= BlkDirty;
1242         }
1243         // nothing else to do; writeback doesn't expect response
1244         assert(!pkt->needsResponse());
1245         pkt->writeDataToBlock(blk->data, blkSize);
1246         DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
1247
1248         incHitCount(pkt);
1249
1250         // A writeback searches for the block, then writes the data
1251         lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1252
1253         // When the packet metadata arrives, the tag lookup will be done while
1254         // the payload is arriving. Then the block will be ready to access as
1255         // soon as the fill is done
1256         blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1257             std::max(cyclesToTicks(tag_latency), (uint64_t)pkt->payloadDelay));
1258
1259         // If this a write-through packet it will be sent to cache below
1260         return !pkt->writeThrough();
1261     } else if (blk && (pkt->needsWritable() ? blk->isWritable() :
1262                        blk->isReadable())) {
1263         // OK to satisfy access
1264         incHitCount(pkt);
1265
1266         // Calculate access latency based on the need to access the data array
1267         if (pkt->isRead() || pkt->isWrite()) {
1268             lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1269
1270             // When a block is compressed, it must first be decompressed
1271             // before being read. This adds to the access latency.
1272             if (compressor && pkt->isRead()) {
1273                 lat += compressor->getDecompressionLatency(blk);
1274             }
1275         } else {
1276             lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
1277         }
1278
1279         satisfyRequest(pkt, blk);
1280         maintainClusivity(pkt->fromCache(), blk);
1281
1282         return true;
1283     }
1284
1285     // Can't satisfy access normally... either no block (blk == nullptr)
1286     // or have block but need writable
1287
1288     incMissCount(pkt);
1289
1290     lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1291
1292     if (!blk && pkt->isLLSC() && pkt->isWrite()) {
1293         // complete miss on store conditional... just give up now
1294         pkt->req->setExtraData(0);
1295         return true;
1296     }
1297
1298     return false;
1299 }
1300
1301 void
1302 BaseCache::maintainClusivity(bool from_cache, CacheBlk *blk)
1303 {
1304     if (from_cache && blk && blk->isValid() && !blk->isDirty() &&
1305         clusivity == Enums::mostly_excl) {
1306         // if we have responded to a cache, and our block is still
1307         // valid, but not dirty, and this cache is mostly exclusive
1308         // with respect to the cache above, drop the block
1309         invalidateBlock(blk);
1310     }
1311 }
1312
1313 CacheBlk*
1314 BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, bool allocate)
1315 {
1316     assert(pkt->isResponse());
1317     Addr addr = pkt->getAddr();
1318     bool is_secure = pkt->isSecure();
1319 #if TRACING_ON
1320     CacheBlk::State old_state = blk ? blk->status : 0;
1321 #endif
1322
1323     // When handling a fill, we should have no writes to this line.
1324     assert(addr == pkt->getBlockAddr(blkSize));
1325     assert(!writeBuffer.findMatch(addr, is_secure));
1326
1327     if (!blk) {
1328         // better have read new data...
1329         assert(pkt->hasData() || pkt->cmd == MemCmd::InvalidateResp);
1330
1331         // Need to do a replacement if allocating, otherwise we stick
1332         // with the temporary storage. The tag lookup has already been
1333         // done to decide the eviction victims, so it is set to 0 here.
1334         // The eviction itself, however, is delayed until the new data
1335         // for the block that is requesting the replacement arrives.
1336         blk = allocate ? allocateBlock(pkt, Cycles(0)) : nullptr;
1337
1338         if (!blk) {
1339             // No replaceable block or a mostly exclusive
1340             // cache... just use temporary storage to complete the
1341             // current request and then get rid of it
1342             blk = tempBlock;
1343             tempBlock->insert(addr, is_secure);
1344             DPRINTF(Cache, "using temp block for %#llx (%s)\n", addr,
1345                     is_secure ? "s" : "ns");
1346         }
1347     } else {
1348         // existing block... probably an upgrade
1349         // don't clear block status... if block is already dirty we
1350         // don't want to lose that
1351     }
1352
1353     // Block is guaranteed to be valid at this point
1354     assert(blk->isValid());
1355     assert(blk->isSecure() == is_secure);
1356     assert(regenerateBlkAddr(blk) == addr);
1357
1358     blk->status |= BlkReadable;
1359
1360     // sanity check for whole-line writes, which should always be
1361     // marked as writable as part of the fill, and then later marked
1362     // dirty as part of satisfyRequest
1363     if (pkt->cmd == MemCmd::InvalidateResp) {
1364         assert(!pkt->hasSharers());
1365     }
1366
1367     // here we deal with setting the appropriate state of the line,
1368     // and we start by looking at the hasSharers flag, and ignore the
1369     // cacheResponding flag (normally signalling dirty data) if the
1370     // packet has sharers, thus the line is never allocated as Owned
1371     // (dirty but not writable), and always ends up being either
1372     // Shared, Exclusive or Modified, see Packet::setCacheResponding
1373     // for more details
1374     if (!pkt->hasSharers()) {
1375         // we could get a writable line from memory (rather than a
1376         // cache) even in a read-only cache, note that we set this bit
1377         // even for a read-only cache, possibly revisit this decision
1378         blk->status |= BlkWritable;
1379
1380         // check if we got this via cache-to-cache transfer (i.e., from a
1381         // cache that had the block in Modified or Owned state)
1382         if (pkt->cacheResponding()) {
1383             // we got the block in Modified state, and invalidated the
1384             // owners copy
1385             blk->status |= BlkDirty;
1386
1387             chatty_assert(!isReadOnly, "Should never see dirty snoop response "
1388                           "in read-only cache %s\n", name());
1389
1390         } else if (pkt->cmd.isSWPrefetch() && pkt->needsWritable()) {
1391             // All other copies of the block were invalidated and we
1392             // have an exclusive copy.
1393
1394             // The coherence protocol assumes that if we fetched an
1395             // exclusive copy of the block, we have the intention to
1396             // modify it. Therefore the MSHR for the PrefetchExReq has
1397             // been the point of ordering and this cache has commited
1398             // to respond to snoops for the block.
1399             //
1400             // In most cases this is true anyway - a PrefetchExReq
1401             // will be followed by a WriteReq. However, if that
1402             // doesn't happen, the block is not marked as dirty and
1403             // the cache doesn't respond to snoops that has committed
1404             // to do so.
1405             //
1406             // To avoid deadlocks in cases where there is a snoop
1407             // between the PrefetchExReq and the expected WriteReq, we
1408             // proactively mark the block as Dirty.
1409
1410             blk->status |= BlkDirty;
1411
1412             panic_if(!isReadOnly, "Prefetch exclusive requests from read-only "
1413                      "cache %s\n", name());
1414         }
1415     }
1416
1417     DPRINTF(Cache, "Block addr %#llx (%s) moving from state %x to %s\n",
1418             addr, is_secure ? "s" : "ns", old_state, blk->print());
1419
1420     // if we got new data, copy it in (checking for a read response
1421     // and a response that has data is the same in the end)
1422     if (pkt->isRead()) {
1423         // sanity checks
1424         assert(pkt->hasData());
1425         assert(pkt->getSize() == blkSize);
1426
1427         pkt->writeDataToBlock(blk->data, blkSize);
1428     }
1429     // The block will be ready when the payload arrives and the fill is done
1430     blk->setWhenReady(clockEdge(fillLatency) + pkt->headerDelay +
1431                       pkt->payloadDelay);
1432
1433     return blk;
1434 }
1435
1436 CacheBlk*
1437 BaseCache::allocateBlock(const PacketPtr pkt, Cycles tag_latency)
1438 {
1439     // Get address
1440     const Addr addr = pkt->getAddr();
1441
1442     // Get secure bit
1443     const bool is_secure = pkt->isSecure();
1444
1445     // Block size and compression related access latency. Only relevant if
1446     // using a compressor, otherwise there is no extra delay, and the block
1447     // is fully sized
1448     std::size_t blk_size_bits = blkSize*8;
1449     Cycles compression_lat = Cycles(0);
1450     Cycles decompression_lat = Cycles(0);
1451
1452     // If a compressor is being used, it is called to compress data before
1453     // insertion. Although in Gem5 the data is stored uncompressed, even if a
1454     // compressor is used, the compression/decompression methods are called to
1455     // calculate the amount of extra cycles needed to read or write compressed
1456     // blocks.
1457     if (compressor) {
1458         compressor->compress(pkt->getConstPtr<uint64_t>(), compression_lat,
1459                              decompression_lat, blk_size_bits);
1460     }
1461
1462     // Find replacement victim
1463     std::vector<CacheBlk*> evict_blks;
1464     CacheBlk *victim = tags->findVictim(addr, is_secure, blk_size_bits,
1465                                         evict_blks);
1466
1467     // It is valid to return nullptr if there is no victim
1468     if (!victim)
1469         return nullptr;
1470
1471     // Print victim block's information
1472     DPRINTF(CacheRepl, "Replacement victim: %s\n", victim->print());
1473
1474     // Check for transient state allocations. If any of the entries listed
1475     // for eviction has a transient state, the allocation fails
1476     bool replacement = false;
1477     for (const auto& blk : evict_blks) {
1478         if (blk->isValid()) {
1479             replacement = true;
1480
1481             Addr repl_addr = regenerateBlkAddr(blk);
1482             MSHR *repl_mshr = mshrQueue.findMatch(repl_addr, blk->isSecure());
1483             if (repl_mshr) {
1484                 // must be an outstanding upgrade or clean request
1485                 // on a block we're about to replace...
1486                 assert((!blk->isWritable() && repl_mshr->needsWritable()) ||
1487                        repl_mshr->isCleaning());
1488
1489                 // too hard to replace block with transient state
1490                 // allocation failed, block not inserted
1491                 return nullptr;
1492             }
1493         }
1494     }
1495
1496     // The victim will be replaced by a new entry, so increase the replacement
1497     // counter if a valid block is being replaced
1498     if (replacement) {
1499         // Evict valid blocks associated to this victim block
1500         for (const auto& blk : evict_blks) {
1501             if (blk->isValid()) {
1502                 DPRINTF(CacheRepl, "Evicting %s (%#llx) to make room for " \
1503                         "%#llx (%s)\n", blk->print(), regenerateBlkAddr(blk),
1504                         addr, is_secure);
1505
1506                 if (blk->wasPrefetched()) {
1507                     unusedPrefetches++;
1508                 }
1509
1510                 Cycles lat =
1511                     calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
1512                 evictBlock(blk, clockEdge(lat + forwardLatency));
1513             }
1514         }
1515
1516         replacements++;
1517     }
1518
1519     // If using a compressor, set compression data. This must be done before
1520     // block insertion, as compressed tags use this information.
1521     if (compressor) {
1522         compressor->setSizeBits(victim, blk_size_bits);
1523         compressor->setDecompressionLatency(victim, decompression_lat);
1524     }
1525
1526     // Insert new block at victimized entry
1527     tags->insertBlock(pkt, victim);
1528
1529     return victim;
1530 }
1531
1532 void
1533 BaseCache::invalidateBlock(CacheBlk *blk)
1534 {
1535     // If handling a block present in the Tags, let it do its invalidation
1536     // process, which will update stats and invalidate the block itself
1537     if (blk != tempBlock) {
1538         tags->invalidate(blk);
1539     } else {
1540         tempBlock->invalidate();
1541     }
1542 }
1543
1544 void
1545 BaseCache::evictBlock(CacheBlk *blk, Tick forward_timing)
1546 {
1547     PacketPtr pkt = evictBlock(blk);
1548     if (pkt) {
1549         if (system->isTimingMode()) {
1550             doWritebacks(pkt, forward_timing);
1551         } else {
1552             doWritebacksAtomic(pkt);
1553         }
1554     }
1555 }
1556
1557 PacketPtr
1558 BaseCache::writebackBlk(CacheBlk *blk)
1559 {
1560     chatty_assert(!isReadOnly || writebackClean,
1561                   "Writeback from read-only cache");
1562     assert(blk && blk->isValid() && (blk->isDirty() || writebackClean));
1563
1564     writebacks[Request::wbMasterId]++;
1565
1566     RequestPtr req = std::make_shared<Request>(
1567         regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1568
1569     if (blk->isSecure())
1570         req->setFlags(Request::SECURE);
1571
1572     req->taskId(blk->task_id);
1573
1574     PacketPtr pkt =
1575         new Packet(req, blk->isDirty() ?
1576                    MemCmd::WritebackDirty : MemCmd::WritebackClean);
1577
1578     DPRINTF(Cache, "Create Writeback %s writable: %d, dirty: %d\n",
1579             pkt->print(), blk->isWritable(), blk->isDirty());
1580
1581     if (blk->isWritable()) {
1582         // not asserting shared means we pass the block in modified
1583         // state, mark our own block non-writeable
1584         blk->status &= ~BlkWritable;
1585     } else {
1586         // we are in the Owned state, tell the receiver
1587         pkt->setHasSharers();
1588     }
1589
1590     // make sure the block is not marked dirty
1591     blk->status &= ~BlkDirty;
1592
1593     pkt->allocate();
1594     pkt->setDataFromBlock(blk->data, blkSize);
1595
1596     // When a block is compressed, it must first be decompressed before being
1597     // sent for writeback.
1598     if (compressor) {
1599         pkt->payloadDelay = compressor->getDecompressionLatency(blk);
1600     }
1601
1602     return pkt;
1603 }
1604
1605 PacketPtr
1606 BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
1607 {
1608     RequestPtr req = std::make_shared<Request>(
1609         regenerateBlkAddr(blk), blkSize, 0, Request::wbMasterId);
1610
1611     if (blk->isSecure()) {
1612         req->setFlags(Request::SECURE);
1613     }
1614     req->taskId(blk->task_id);
1615
1616     PacketPtr pkt = new Packet(req, MemCmd::WriteClean, blkSize, id);
1617
1618     if (dest) {
1619         req->setFlags(dest);
1620         pkt->setWriteThrough();
1621     }
1622
1623     DPRINTF(Cache, "Create %s writable: %d, dirty: %d\n", pkt->print(),
1624             blk->isWritable(), blk->isDirty());
1625
1626     if (blk->isWritable()) {
1627         // not asserting shared means we pass the block in modified
1628         // state, mark our own block non-writeable
1629         blk->status &= ~BlkWritable;
1630     } else {
1631         // we are in the Owned state, tell the receiver
1632         pkt->setHasSharers();
1633     }
1634
1635     // make sure the block is not marked dirty
1636     blk->status &= ~BlkDirty;
1637
1638     pkt->allocate();
1639     pkt->setDataFromBlock(blk->data, blkSize);
1640
1641     // When a block is compressed, it must first be decompressed before being
1642     // sent for writeback.
1643     if (compressor) {
1644         pkt->payloadDelay = compressor->getDecompressionLatency(blk);
1645     }
1646
1647     return pkt;
1648 }
1649
1650
1651 void
1652 BaseCache::memWriteback()
1653 {
1654     tags->forEachBlk([this](CacheBlk &blk) { writebackVisitor(blk); });
1655 }
1656
1657 void
1658 BaseCache::memInvalidate()
1659 {
1660     tags->forEachBlk([this](CacheBlk &blk) { invalidateVisitor(blk); });
1661 }
1662
1663 bool
1664 BaseCache::isDirty() const
1665 {
1666     return tags->anyBlk([](CacheBlk &blk) { return blk.isDirty(); });
1667 }
1668
1669 bool
1670 BaseCache::coalesce() const
1671 {
1672     return writeAllocator && writeAllocator->coalesce();
1673 }
1674
1675 void
1676 BaseCache::writebackVisitor(CacheBlk &blk)
1677 {
1678     if (blk.isDirty()) {
1679         assert(blk.isValid());
1680
1681         RequestPtr request = std::make_shared<Request>(
1682             regenerateBlkAddr(&blk), blkSize, 0, Request::funcMasterId);
1683
1684         request->taskId(blk.task_id);
1685         if (blk.isSecure()) {
1686             request->setFlags(Request::SECURE);
1687         }
1688
1689         Packet packet(request, MemCmd::WriteReq);
1690         packet.dataStatic(blk.data);
1691
1692         memSidePort.sendFunctional(&packet);
1693
1694         blk.status &= ~BlkDirty;
1695     }
1696 }
1697
1698 void
1699 BaseCache::invalidateVisitor(CacheBlk &blk)
1700 {
1701     if (blk.isDirty())
1702         warn_once("Invalidating dirty cache lines. " \
1703                   "Expect things to break.\n");
1704
1705     if (blk.isValid()) {
1706         assert(!blk.isDirty());
1707         invalidateBlock(&blk);
1708     }
1709 }
1710
1711 Tick
1712 BaseCache::nextQueueReadyTime() const
1713 {
1714     Tick nextReady = std::min(mshrQueue.nextReadyTime(),
1715                               writeBuffer.nextReadyTime());
1716
1717     // Don't signal prefetch ready time if no MSHRs available
1718     // Will signal once enoguh MSHRs are deallocated
1719     if (prefetcher && mshrQueue.canPrefetch()) {
1720         nextReady = std::min(nextReady,
1721                              prefetcher->nextPrefetchReadyTime());
1722     }
1723
1724     return nextReady;
1725 }
1726
1727
1728 bool
1729 BaseCache::sendMSHRQueuePacket(MSHR* mshr)
1730 {
1731     assert(mshr);
1732
1733     // use request from 1st target
1734     PacketPtr tgt_pkt = mshr->getTarget()->pkt;
1735
1736     DPRINTF(Cache, "%s: MSHR %s\n", __func__, tgt_pkt->print());
1737
1738     // if the cache is in write coalescing mode or (additionally) in
1739     // no allocation mode, and we have a write packet with an MSHR
1740     // that is not a whole-line write (due to incompatible flags etc),
1741     // then reset the write mode
1742     if (writeAllocator && writeAllocator->coalesce() && tgt_pkt->isWrite()) {
1743         if (!mshr->isWholeLineWrite()) {
1744             // if we are currently write coalescing, hold on the
1745             // MSHR as many cycles extra as we need to completely
1746             // write a cache line
1747             if (writeAllocator->delay(mshr->blkAddr)) {
1748                 Tick delay = blkSize / tgt_pkt->getSize() * clockPeriod();
1749                 DPRINTF(CacheVerbose, "Delaying pkt %s %llu ticks to allow "
1750                         "for write coalescing\n", tgt_pkt->print(), delay);
1751                 mshrQueue.delay(mshr, delay);
1752                 return false;
1753             } else {
1754                 writeAllocator->reset();
1755             }
1756         } else {
1757             writeAllocator->resetDelay(mshr->blkAddr);
1758         }
1759     }
1760
1761     CacheBlk *blk = tags->findBlock(mshr->blkAddr, mshr->isSecure);
1762
1763     // either a prefetch that is not present upstream, or a normal
1764     // MSHR request, proceed to get the packet to send downstream
1765     PacketPtr pkt = createMissPacket(tgt_pkt, blk, mshr->needsWritable(),
1766                                      mshr->isWholeLineWrite());
1767
1768     mshr->isForward = (pkt == nullptr);
1769
1770     if (mshr->isForward) {
1771         // not a cache block request, but a response is expected
1772         // make copy of current packet to forward, keep current
1773         // copy for response handling
1774         pkt = new Packet(tgt_pkt, false, true);
1775         assert(!pkt->isWrite());
1776     }
1777
1778     // play it safe and append (rather than set) the sender state,
1779     // as forwarded packets may already have existing state
1780     pkt->pushSenderState(mshr);
1781
1782     if (pkt->isClean() && blk && blk->isDirty()) {
1783         // A cache clean opearation is looking for a dirty block. Mark
1784         // the packet so that the destination xbar can determine that
1785         // there will be a follow-up write packet as well.
1786         pkt->setSatisfied();
1787     }
1788
1789     if (!memSidePort.sendTimingReq(pkt)) {
1790         // we are awaiting a retry, but we
1791         // delete the packet and will be creating a new packet
1792         // when we get the opportunity
1793         delete pkt;
1794
1795         // note that we have now masked any requestBus and
1796         // schedSendEvent (we will wait for a retry before
1797         // doing anything), and this is so even if we do not
1798         // care about this packet and might override it before
1799         // it gets retried
1800         return true;
1801     } else {
1802         // As part of the call to sendTimingReq the packet is
1803         // forwarded to all neighbouring caches (and any caches
1804         // above them) as a snoop. Thus at this point we know if
1805         // any of the neighbouring caches are responding, and if
1806         // so, we know it is dirty, and we can determine if it is
1807         // being passed as Modified, making our MSHR the ordering
1808         // point
1809         bool pending_modified_resp = !pkt->hasSharers() &&
1810             pkt->cacheResponding();
1811         markInService(mshr, pending_modified_resp);
1812
1813         if (pkt->isClean() && blk && blk->isDirty()) {
1814             // A cache clean opearation is looking for a dirty
1815             // block. If a dirty block is encountered a WriteClean
1816             // will update any copies to the path to the memory
1817             // until the point of reference.
1818             DPRINTF(CacheVerbose, "%s: packet %s found block: %s\n",
1819                     __func__, pkt->print(), blk->print());
1820             PacketPtr wb_pkt = writecleanBlk(blk, pkt->req->getDest(),
1821                                              pkt->id);
1822             doWritebacks(wb_pkt, 0);
1823         }
1824
1825         return false;
1826     }
1827 }
1828
1829 bool
1830 BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry)
1831 {
1832     assert(wq_entry);
1833
1834     // always a single target for write queue entries
1835     PacketPtr tgt_pkt = wq_entry->getTarget()->pkt;
1836
1837     DPRINTF(Cache, "%s: write %s\n", __func__, tgt_pkt->print());
1838
1839     // forward as is, both for evictions and uncacheable writes
1840     if (!memSidePort.sendTimingReq(tgt_pkt)) {
1841         // note that we have now masked any requestBus and
1842         // schedSendEvent (we will wait for a retry before
1843         // doing anything), and this is so even if we do not
1844         // care about this packet and might override it before
1845         // it gets retried
1846         return true;
1847     } else {
1848         markInService(wq_entry);
1849         return false;
1850     }
1851 }
1852
1853 void
1854 BaseCache::serialize(CheckpointOut &cp) const
1855 {
1856     bool dirty(isDirty());
1857
1858     if (dirty) {
1859         warn("*** The cache still contains dirty data. ***\n");
1860         warn("    Make sure to drain the system using the correct flags.\n");
1861         warn("    This checkpoint will not restore correctly " \
1862              "and dirty data in the cache will be lost!\n");
1863     }
1864
1865     // Since we don't checkpoint the data in the cache, any dirty data
1866     // will be lost when restoring from a checkpoint of a system that
1867     // wasn't drained properly. Flag the checkpoint as invalid if the
1868     // cache contains dirty data.
1869     bool bad_checkpoint(dirty);
1870     SERIALIZE_SCALAR(bad_checkpoint);
1871 }
1872
1873 void
1874 BaseCache::unserialize(CheckpointIn &cp)
1875 {
1876     bool bad_checkpoint;
1877     UNSERIALIZE_SCALAR(bad_checkpoint);
1878     if (bad_checkpoint) {
1879         fatal("Restoring from checkpoints with dirty caches is not "
1880               "supported in the classic memory system. Please remove any "
1881               "caches or drain them properly before taking checkpoints.\n");
1882     }
1883 }
1884
1885 void
1886 BaseCache::regStats()
1887 {
1888     ClockedObject::regStats();
1889
1890     using namespace Stats;
1891
1892     // Hit statistics
1893     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1894         MemCmd cmd(access_idx);
1895         const string &cstr = cmd.toString();
1896
1897         hits[access_idx]
1898             .init(system->maxMasters())
1899             .name(name() + "." + cstr + "_hits")
1900             .desc("number of " + cstr + " hits")
1901             .flags(total | nozero | nonan)
1902             ;
1903         for (int i = 0; i < system->maxMasters(); i++) {
1904             hits[access_idx].subname(i, system->getMasterName(i));
1905         }
1906     }
1907
1908 // These macros make it easier to sum the right subset of commands and
1909 // to change the subset of commands that are considered "demand" vs
1910 // "non-demand"
1911 #define SUM_DEMAND(s) \
1912     (s[MemCmd::ReadReq] + s[MemCmd::WriteReq] + s[MemCmd::WriteLineReq] + \
1913      s[MemCmd::ReadExReq] + s[MemCmd::ReadCleanReq] + s[MemCmd::ReadSharedReq])
1914
1915 // should writebacks be included here?  prior code was inconsistent...
1916 #define SUM_NON_DEMAND(s) \
1917     (s[MemCmd::SoftPFReq] + s[MemCmd::HardPFReq] + s[MemCmd::SoftPFExReq])
1918
1919     demandHits
1920         .name(name() + ".demand_hits")
1921         .desc("number of demand (read+write) hits")
1922         .flags(total | nozero | nonan)
1923         ;
1924     demandHits = SUM_DEMAND(hits);
1925     for (int i = 0; i < system->maxMasters(); i++) {
1926         demandHits.subname(i, system->getMasterName(i));
1927     }
1928
1929     overallHits
1930         .name(name() + ".overall_hits")
1931         .desc("number of overall hits")
1932         .flags(total | nozero | nonan)
1933         ;
1934     overallHits = demandHits + SUM_NON_DEMAND(hits);
1935     for (int i = 0; i < system->maxMasters(); i++) {
1936         overallHits.subname(i, system->getMasterName(i));
1937     }
1938
1939     // Miss statistics
1940     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1941         MemCmd cmd(access_idx);
1942         const string &cstr = cmd.toString();
1943
1944         misses[access_idx]
1945             .init(system->maxMasters())
1946             .name(name() + "." + cstr + "_misses")
1947             .desc("number of " + cstr + " misses")
1948             .flags(total | nozero | nonan)
1949             ;
1950         for (int i = 0; i < system->maxMasters(); i++) {
1951             misses[access_idx].subname(i, system->getMasterName(i));
1952         }
1953     }
1954
1955     demandMisses
1956         .name(name() + ".demand_misses")
1957         .desc("number of demand (read+write) misses")
1958         .flags(total | nozero | nonan)
1959         ;
1960     demandMisses = SUM_DEMAND(misses);
1961     for (int i = 0; i < system->maxMasters(); i++) {
1962         demandMisses.subname(i, system->getMasterName(i));
1963     }
1964
1965     overallMisses
1966         .name(name() + ".overall_misses")
1967         .desc("number of overall misses")
1968         .flags(total | nozero | nonan)
1969         ;
1970     overallMisses = demandMisses + SUM_NON_DEMAND(misses);
1971     for (int i = 0; i < system->maxMasters(); i++) {
1972         overallMisses.subname(i, system->getMasterName(i));
1973     }
1974
1975     // Miss latency statistics
1976     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
1977         MemCmd cmd(access_idx);
1978         const string &cstr = cmd.toString();
1979
1980         missLatency[access_idx]
1981             .init(system->maxMasters())
1982             .name(name() + "." + cstr + "_miss_latency")
1983             .desc("number of " + cstr + " miss cycles")
1984             .flags(total | nozero | nonan)
1985             ;
1986         for (int i = 0; i < system->maxMasters(); i++) {
1987             missLatency[access_idx].subname(i, system->getMasterName(i));
1988         }
1989     }
1990
1991     demandMissLatency
1992         .name(name() + ".demand_miss_latency")
1993         .desc("number of demand (read+write) miss cycles")
1994         .flags(total | nozero | nonan)
1995         ;
1996     demandMissLatency = SUM_DEMAND(missLatency);
1997     for (int i = 0; i < system->maxMasters(); i++) {
1998         demandMissLatency.subname(i, system->getMasterName(i));
1999     }
2000
2001     overallMissLatency
2002         .name(name() + ".overall_miss_latency")
2003         .desc("number of overall miss cycles")
2004         .flags(total | nozero | nonan)
2005         ;
2006     overallMissLatency = demandMissLatency + SUM_NON_DEMAND(missLatency);
2007     for (int i = 0; i < system->maxMasters(); i++) {
2008         overallMissLatency.subname(i, system->getMasterName(i));
2009     }
2010
2011     // access formulas
2012     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2013         MemCmd cmd(access_idx);
2014         const string &cstr = cmd.toString();
2015
2016         accesses[access_idx]
2017             .name(name() + "." + cstr + "_accesses")
2018             .desc("number of " + cstr + " accesses(hits+misses)")
2019             .flags(total | nozero | nonan)
2020             ;
2021         accesses[access_idx] = hits[access_idx] + misses[access_idx];
2022
2023         for (int i = 0; i < system->maxMasters(); i++) {
2024             accesses[access_idx].subname(i, system->getMasterName(i));
2025         }
2026     }
2027
2028     demandAccesses
2029         .name(name() + ".demand_accesses")
2030         .desc("number of demand (read+write) accesses")
2031         .flags(total | nozero | nonan)
2032         ;
2033     demandAccesses = demandHits + demandMisses;
2034     for (int i = 0; i < system->maxMasters(); i++) {
2035         demandAccesses.subname(i, system->getMasterName(i));
2036     }
2037
2038     overallAccesses
2039         .name(name() + ".overall_accesses")
2040         .desc("number of overall (read+write) accesses")
2041         .flags(total | nozero | nonan)
2042         ;
2043     overallAccesses = overallHits + overallMisses;
2044     for (int i = 0; i < system->maxMasters(); i++) {
2045         overallAccesses.subname(i, system->getMasterName(i));
2046     }
2047
2048     // miss rate formulas
2049     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2050         MemCmd cmd(access_idx);
2051         const string &cstr = cmd.toString();
2052
2053         missRate[access_idx]
2054             .name(name() + "." + cstr + "_miss_rate")
2055             .desc("miss rate for " + cstr + " accesses")
2056             .flags(total | nozero | nonan)
2057             ;
2058         missRate[access_idx] = misses[access_idx] / accesses[access_idx];
2059
2060         for (int i = 0; i < system->maxMasters(); i++) {
2061             missRate[access_idx].subname(i, system->getMasterName(i));
2062         }
2063     }
2064
2065     demandMissRate
2066         .name(name() + ".demand_miss_rate")
2067         .desc("miss rate for demand accesses")
2068         .flags(total | nozero | nonan)
2069         ;
2070     demandMissRate = demandMisses / demandAccesses;
2071     for (int i = 0; i < system->maxMasters(); i++) {
2072         demandMissRate.subname(i, system->getMasterName(i));
2073     }
2074
2075     overallMissRate
2076         .name(name() + ".overall_miss_rate")
2077         .desc("miss rate for overall accesses")
2078         .flags(total | nozero | nonan)
2079         ;
2080     overallMissRate = overallMisses / overallAccesses;
2081     for (int i = 0; i < system->maxMasters(); i++) {
2082         overallMissRate.subname(i, system->getMasterName(i));
2083     }
2084
2085     // miss latency formulas
2086     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2087         MemCmd cmd(access_idx);
2088         const string &cstr = cmd.toString();
2089
2090         avgMissLatency[access_idx]
2091             .name(name() + "." + cstr + "_avg_miss_latency")
2092             .desc("average " + cstr + " miss latency")
2093             .flags(total | nozero | nonan)
2094             ;
2095         avgMissLatency[access_idx] =
2096             missLatency[access_idx] / misses[access_idx];
2097
2098         for (int i = 0; i < system->maxMasters(); i++) {
2099             avgMissLatency[access_idx].subname(i, system->getMasterName(i));
2100         }
2101     }
2102
2103     demandAvgMissLatency
2104         .name(name() + ".demand_avg_miss_latency")
2105         .desc("average overall miss latency")
2106         .flags(total | nozero | nonan)
2107         ;
2108     demandAvgMissLatency = demandMissLatency / demandMisses;
2109     for (int i = 0; i < system->maxMasters(); i++) {
2110         demandAvgMissLatency.subname(i, system->getMasterName(i));
2111     }
2112
2113     overallAvgMissLatency
2114         .name(name() + ".overall_avg_miss_latency")
2115         .desc("average overall miss latency")
2116         .flags(total | nozero | nonan)
2117         ;
2118     overallAvgMissLatency = overallMissLatency / overallMisses;
2119     for (int i = 0; i < system->maxMasters(); i++) {
2120         overallAvgMissLatency.subname(i, system->getMasterName(i));
2121     }
2122
2123     blocked_cycles.init(NUM_BLOCKED_CAUSES);
2124     blocked_cycles
2125         .name(name() + ".blocked_cycles")
2126         .desc("number of cycles access was blocked")
2127         .subname(Blocked_NoMSHRs, "no_mshrs")
2128         .subname(Blocked_NoTargets, "no_targets")
2129         ;
2130
2131
2132     blocked_causes.init(NUM_BLOCKED_CAUSES);
2133     blocked_causes
2134         .name(name() + ".blocked")
2135         .desc("number of cycles access was blocked")
2136         .subname(Blocked_NoMSHRs, "no_mshrs")
2137         .subname(Blocked_NoTargets, "no_targets")
2138         ;
2139
2140     avg_blocked
2141         .name(name() + ".avg_blocked_cycles")
2142         .desc("average number of cycles each access was blocked")
2143         .subname(Blocked_NoMSHRs, "no_mshrs")
2144         .subname(Blocked_NoTargets, "no_targets")
2145         ;
2146
2147     avg_blocked = blocked_cycles / blocked_causes;
2148
2149     unusedPrefetches
2150         .name(name() + ".unused_prefetches")
2151         .desc("number of HardPF blocks evicted w/o reference")
2152         .flags(nozero)
2153         ;
2154
2155     writebacks
2156         .init(system->maxMasters())
2157         .name(name() + ".writebacks")
2158         .desc("number of writebacks")
2159         .flags(total | nozero | nonan)
2160         ;
2161     for (int i = 0; i < system->maxMasters(); i++) {
2162         writebacks.subname(i, system->getMasterName(i));
2163     }
2164
2165     // MSHR statistics
2166     // MSHR hit statistics
2167     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2168         MemCmd cmd(access_idx);
2169         const string &cstr = cmd.toString();
2170
2171         mshr_hits[access_idx]
2172             .init(system->maxMasters())
2173             .name(name() + "." + cstr + "_mshr_hits")
2174             .desc("number of " + cstr + " MSHR hits")
2175             .flags(total | nozero | nonan)
2176             ;
2177         for (int i = 0; i < system->maxMasters(); i++) {
2178             mshr_hits[access_idx].subname(i, system->getMasterName(i));
2179         }
2180     }
2181
2182     demandMshrHits
2183         .name(name() + ".demand_mshr_hits")
2184         .desc("number of demand (read+write) MSHR hits")
2185         .flags(total | nozero | nonan)
2186         ;
2187     demandMshrHits = SUM_DEMAND(mshr_hits);
2188     for (int i = 0; i < system->maxMasters(); i++) {
2189         demandMshrHits.subname(i, system->getMasterName(i));
2190     }
2191
2192     overallMshrHits
2193         .name(name() + ".overall_mshr_hits")
2194         .desc("number of overall MSHR hits")
2195         .flags(total | nozero | nonan)
2196         ;
2197     overallMshrHits = demandMshrHits + SUM_NON_DEMAND(mshr_hits);
2198     for (int i = 0; i < system->maxMasters(); i++) {
2199         overallMshrHits.subname(i, system->getMasterName(i));
2200     }
2201
2202     // MSHR miss statistics
2203     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2204         MemCmd cmd(access_idx);
2205         const string &cstr = cmd.toString();
2206
2207         mshr_misses[access_idx]
2208             .init(system->maxMasters())
2209             .name(name() + "." + cstr + "_mshr_misses")
2210             .desc("number of " + cstr + " MSHR misses")
2211             .flags(total | nozero | nonan)
2212             ;
2213         for (int i = 0; i < system->maxMasters(); i++) {
2214             mshr_misses[access_idx].subname(i, system->getMasterName(i));
2215         }
2216     }
2217
2218     demandMshrMisses
2219         .name(name() + ".demand_mshr_misses")
2220         .desc("number of demand (read+write) MSHR misses")
2221         .flags(total | nozero | nonan)
2222         ;
2223     demandMshrMisses = SUM_DEMAND(mshr_misses);
2224     for (int i = 0; i < system->maxMasters(); i++) {
2225         demandMshrMisses.subname(i, system->getMasterName(i));
2226     }
2227
2228     overallMshrMisses
2229         .name(name() + ".overall_mshr_misses")
2230         .desc("number of overall MSHR misses")
2231         .flags(total | nozero | nonan)
2232         ;
2233     overallMshrMisses = demandMshrMisses + SUM_NON_DEMAND(mshr_misses);
2234     for (int i = 0; i < system->maxMasters(); i++) {
2235         overallMshrMisses.subname(i, system->getMasterName(i));
2236     }
2237
2238     // MSHR miss latency statistics
2239     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2240         MemCmd cmd(access_idx);
2241         const string &cstr = cmd.toString();
2242
2243         mshr_miss_latency[access_idx]
2244             .init(system->maxMasters())
2245             .name(name() + "." + cstr + "_mshr_miss_latency")
2246             .desc("number of " + cstr + " MSHR miss cycles")
2247             .flags(total | nozero | nonan)
2248             ;
2249         for (int i = 0; i < system->maxMasters(); i++) {
2250             mshr_miss_latency[access_idx].subname(i, system->getMasterName(i));
2251         }
2252     }
2253
2254     demandMshrMissLatency
2255         .name(name() + ".demand_mshr_miss_latency")
2256         .desc("number of demand (read+write) MSHR miss cycles")
2257         .flags(total | nozero | nonan)
2258         ;
2259     demandMshrMissLatency = SUM_DEMAND(mshr_miss_latency);
2260     for (int i = 0; i < system->maxMasters(); i++) {
2261         demandMshrMissLatency.subname(i, system->getMasterName(i));
2262     }
2263
2264     overallMshrMissLatency
2265         .name(name() + ".overall_mshr_miss_latency")
2266         .desc("number of overall MSHR miss cycles")
2267         .flags(total | nozero | nonan)
2268         ;
2269     overallMshrMissLatency =
2270         demandMshrMissLatency + SUM_NON_DEMAND(mshr_miss_latency);
2271     for (int i = 0; i < system->maxMasters(); i++) {
2272         overallMshrMissLatency.subname(i, system->getMasterName(i));
2273     }
2274
2275     // MSHR uncacheable statistics
2276     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2277         MemCmd cmd(access_idx);
2278         const string &cstr = cmd.toString();
2279
2280         mshr_uncacheable[access_idx]
2281             .init(system->maxMasters())
2282             .name(name() + "." + cstr + "_mshr_uncacheable")
2283             .desc("number of " + cstr + " MSHR uncacheable")
2284             .flags(total | nozero | nonan)
2285             ;
2286         for (int i = 0; i < system->maxMasters(); i++) {
2287             mshr_uncacheable[access_idx].subname(i, system->getMasterName(i));
2288         }
2289     }
2290
2291     overallMshrUncacheable
2292         .name(name() + ".overall_mshr_uncacheable_misses")
2293         .desc("number of overall MSHR uncacheable misses")
2294         .flags(total | nozero | nonan)
2295         ;
2296     overallMshrUncacheable =
2297         SUM_DEMAND(mshr_uncacheable) + SUM_NON_DEMAND(mshr_uncacheable);
2298     for (int i = 0; i < system->maxMasters(); i++) {
2299         overallMshrUncacheable.subname(i, system->getMasterName(i));
2300     }
2301
2302     // MSHR miss latency statistics
2303     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2304         MemCmd cmd(access_idx);
2305         const string &cstr = cmd.toString();
2306
2307         mshr_uncacheable_lat[access_idx]
2308             .init(system->maxMasters())
2309             .name(name() + "." + cstr + "_mshr_uncacheable_latency")
2310             .desc("number of " + cstr + " MSHR uncacheable cycles")
2311             .flags(total | nozero | nonan)
2312             ;
2313         for (int i = 0; i < system->maxMasters(); i++) {
2314             mshr_uncacheable_lat[access_idx].subname(
2315                 i, system->getMasterName(i));
2316         }
2317     }
2318
2319     overallMshrUncacheableLatency
2320         .name(name() + ".overall_mshr_uncacheable_latency")
2321         .desc("number of overall MSHR uncacheable cycles")
2322         .flags(total | nozero | nonan)
2323         ;
2324     overallMshrUncacheableLatency =
2325         SUM_DEMAND(mshr_uncacheable_lat) +
2326         SUM_NON_DEMAND(mshr_uncacheable_lat);
2327     for (int i = 0; i < system->maxMasters(); i++) {
2328         overallMshrUncacheableLatency.subname(i, system->getMasterName(i));
2329     }
2330
2331 #if 0
2332     // MSHR access formulas
2333     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2334         MemCmd cmd(access_idx);
2335         const string &cstr = cmd.toString();
2336
2337         mshrAccesses[access_idx]
2338             .name(name() + "." + cstr + "_mshr_accesses")
2339             .desc("number of " + cstr + " mshr accesses(hits+misses)")
2340             .flags(total | nozero | nonan)
2341             ;
2342         mshrAccesses[access_idx] =
2343             mshr_hits[access_idx] + mshr_misses[access_idx]
2344             + mshr_uncacheable[access_idx];
2345     }
2346
2347     demandMshrAccesses
2348         .name(name() + ".demand_mshr_accesses")
2349         .desc("number of demand (read+write) mshr accesses")
2350         .flags(total | nozero | nonan)
2351         ;
2352     demandMshrAccesses = demandMshrHits + demandMshrMisses;
2353
2354     overallMshrAccesses
2355         .name(name() + ".overall_mshr_accesses")
2356         .desc("number of overall (read+write) mshr accesses")
2357         .flags(total | nozero | nonan)
2358         ;
2359     overallMshrAccesses = overallMshrHits + overallMshrMisses
2360         + overallMshrUncacheable;
2361 #endif
2362
2363     // MSHR miss rate formulas
2364     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2365         MemCmd cmd(access_idx);
2366         const string &cstr = cmd.toString();
2367
2368         mshrMissRate[access_idx]
2369             .name(name() + "." + cstr + "_mshr_miss_rate")
2370             .desc("mshr miss rate for " + cstr + " accesses")
2371             .flags(total | nozero | nonan)
2372             ;
2373         mshrMissRate[access_idx] =
2374             mshr_misses[access_idx] / accesses[access_idx];
2375
2376         for (int i = 0; i < system->maxMasters(); i++) {
2377             mshrMissRate[access_idx].subname(i, system->getMasterName(i));
2378         }
2379     }
2380
2381     demandMshrMissRate
2382         .name(name() + ".demand_mshr_miss_rate")
2383         .desc("mshr miss rate for demand accesses")
2384         .flags(total | nozero | nonan)
2385         ;
2386     demandMshrMissRate = demandMshrMisses / demandAccesses;
2387     for (int i = 0; i < system->maxMasters(); i++) {
2388         demandMshrMissRate.subname(i, system->getMasterName(i));
2389     }
2390
2391     overallMshrMissRate
2392         .name(name() + ".overall_mshr_miss_rate")
2393         .desc("mshr miss rate for overall accesses")
2394         .flags(total | nozero | nonan)
2395         ;
2396     overallMshrMissRate = overallMshrMisses / overallAccesses;
2397     for (int i = 0; i < system->maxMasters(); i++) {
2398         overallMshrMissRate.subname(i, system->getMasterName(i));
2399     }
2400
2401     // mshrMiss latency formulas
2402     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2403         MemCmd cmd(access_idx);
2404         const string &cstr = cmd.toString();
2405
2406         avgMshrMissLatency[access_idx]
2407             .name(name() + "." + cstr + "_avg_mshr_miss_latency")
2408             .desc("average " + cstr + " mshr miss latency")
2409             .flags(total | nozero | nonan)
2410             ;
2411         avgMshrMissLatency[access_idx] =
2412             mshr_miss_latency[access_idx] / mshr_misses[access_idx];
2413
2414         for (int i = 0; i < system->maxMasters(); i++) {
2415             avgMshrMissLatency[access_idx].subname(
2416                 i, system->getMasterName(i));
2417         }
2418     }
2419
2420     demandAvgMshrMissLatency
2421         .name(name() + ".demand_avg_mshr_miss_latency")
2422         .desc("average overall mshr miss latency")
2423         .flags(total | nozero | nonan)
2424         ;
2425     demandAvgMshrMissLatency = demandMshrMissLatency / demandMshrMisses;
2426     for (int i = 0; i < system->maxMasters(); i++) {
2427         demandAvgMshrMissLatency.subname(i, system->getMasterName(i));
2428     }
2429
2430     overallAvgMshrMissLatency
2431         .name(name() + ".overall_avg_mshr_miss_latency")
2432         .desc("average overall mshr miss latency")
2433         .flags(total | nozero | nonan)
2434         ;
2435     overallAvgMshrMissLatency = overallMshrMissLatency / overallMshrMisses;
2436     for (int i = 0; i < system->maxMasters(); i++) {
2437         overallAvgMshrMissLatency.subname(i, system->getMasterName(i));
2438     }
2439
2440     // mshrUncacheable latency formulas
2441     for (int access_idx = 0; access_idx < MemCmd::NUM_MEM_CMDS; ++access_idx) {
2442         MemCmd cmd(access_idx);
2443         const string &cstr = cmd.toString();
2444
2445         avgMshrUncacheableLatency[access_idx]
2446             .name(name() + "." + cstr + "_avg_mshr_uncacheable_latency")
2447             .desc("average " + cstr + " mshr uncacheable latency")
2448             .flags(total | nozero | nonan)
2449             ;
2450         avgMshrUncacheableLatency[access_idx] =
2451             mshr_uncacheable_lat[access_idx] / mshr_uncacheable[access_idx];
2452
2453         for (int i = 0; i < system->maxMasters(); i++) {
2454             avgMshrUncacheableLatency[access_idx].subname(
2455                 i, system->getMasterName(i));
2456         }
2457     }
2458
2459     overallAvgMshrUncacheableLatency
2460         .name(name() + ".overall_avg_mshr_uncacheable_latency")
2461         .desc("average overall mshr uncacheable latency")
2462         .flags(total | nozero | nonan)
2463         ;
2464     overallAvgMshrUncacheableLatency =
2465         overallMshrUncacheableLatency / overallMshrUncacheable;
2466     for (int i = 0; i < system->maxMasters(); i++) {
2467         overallAvgMshrUncacheableLatency.subname(i, system->getMasterName(i));
2468     }
2469
2470     replacements
2471         .name(name() + ".replacements")
2472         .desc("number of replacements")
2473         ;
2474
2475     dataExpansions
2476         .name(name() + ".data_expansions")
2477         .desc("number of data expansions")
2478         .flags(nozero | nonan)
2479         ;
2480 }
2481
2482 void
2483 BaseCache::regProbePoints()
2484 {
2485     ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit");
2486     ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss");
2487     ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill");
2488 }
2489
2490 ///////////////
2491 //
2492 // CpuSidePort
2493 //
2494 ///////////////
2495 bool
2496 BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
2497 {
2498     // Snoops shouldn't happen when bypassing caches
2499     assert(!cache->system->bypassCaches());
2500
2501     assert(pkt->isResponse());
2502
2503     // Express snoop responses from master to slave, e.g., from L1 to L2
2504     cache->recvTimingSnoopResp(pkt);
2505     return true;
2506 }
2507
2508
2509 bool
2510 BaseCache::CpuSidePort::tryTiming(PacketPtr pkt)
2511 {
2512     if (cache->system->bypassCaches() || pkt->isExpressSnoop()) {
2513         // always let express snoop packets through even if blocked
2514         return true;
2515     } else if (blocked || mustSendRetry) {
2516         // either already committed to send a retry, or blocked
2517         mustSendRetry = true;
2518         return false;
2519     }
2520     mustSendRetry = false;
2521     return true;
2522 }
2523
2524 bool
2525 BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
2526 {
2527     assert(pkt->isRequest());
2528
2529     if (cache->system->bypassCaches()) {
2530         // Just forward the packet if caches are disabled.
2531         // @todo This should really enqueue the packet rather
2532         bool M5_VAR_USED success = cache->memSidePort.sendTimingReq(pkt);
2533         assert(success);
2534         return true;
2535     } else if (tryTiming(pkt)) {
2536         cache->recvTimingReq(pkt);
2537         return true;
2538     }
2539     return false;
2540 }
2541
2542 Tick
2543 BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt)
2544 {
2545     if (cache->system->bypassCaches()) {
2546         // Forward the request if the system is in cache bypass mode.
2547         return cache->memSidePort.sendAtomic(pkt);
2548     } else {
2549         return cache->recvAtomic(pkt);
2550     }
2551 }
2552
2553 void
2554 BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt)
2555 {
2556     if (cache->system->bypassCaches()) {
2557         // The cache should be flushed if we are in cache bypass mode,
2558         // so we don't need to check if we need to update anything.
2559         cache->memSidePort.sendFunctional(pkt);
2560         return;
2561     }
2562
2563     // functional request
2564     cache->functionalAccess(pkt, true);
2565 }
2566
2567 AddrRangeList
2568 BaseCache::CpuSidePort::getAddrRanges() const
2569 {
2570     return cache->getAddrRanges();
2571 }
2572
2573
2574 BaseCache::
2575 CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache,
2576                          const std::string &_label)
2577     : CacheSlavePort(_name, _cache, _label), cache(_cache)
2578 {
2579 }
2580
2581 ///////////////
2582 //
2583 // MemSidePort
2584 //
2585 ///////////////
2586 bool
2587 BaseCache::MemSidePort::recvTimingResp(PacketPtr pkt)
2588 {
2589     cache->recvTimingResp(pkt);
2590     return true;
2591 }
2592
2593 // Express snooping requests to memside port
2594 void
2595 BaseCache::MemSidePort::recvTimingSnoopReq(PacketPtr pkt)
2596 {
2597     // Snoops shouldn't happen when bypassing caches
2598     assert(!cache->system->bypassCaches());
2599
2600     // handle snooping requests
2601     cache->recvTimingSnoopReq(pkt);
2602 }
2603
2604 Tick
2605 BaseCache::MemSidePort::recvAtomicSnoop(PacketPtr pkt)
2606 {
2607     // Snoops shouldn't happen when bypassing caches
2608     assert(!cache->system->bypassCaches());
2609
2610     return cache->recvAtomicSnoop(pkt);
2611 }
2612
2613 void
2614 BaseCache::MemSidePort::recvFunctionalSnoop(PacketPtr pkt)
2615 {
2616     // Snoops shouldn't happen when bypassing caches
2617     assert(!cache->system->bypassCaches());
2618
2619     // functional snoop (note that in contrast to atomic we don't have
2620     // a specific functionalSnoop method, as they have the same
2621     // behaviour regardless)
2622     cache->functionalAccess(pkt, false);
2623 }
2624
2625 void
2626 BaseCache::CacheReqPacketQueue::sendDeferredPacket()
2627 {
2628     // sanity check
2629     assert(!waitingOnRetry);
2630
2631     // there should never be any deferred request packets in the
2632     // queue, instead we resly on the cache to provide the packets
2633     // from the MSHR queue or write queue
2634     assert(deferredPacketReadyTime() == MaxTick);
2635
2636     // check for request packets (requests & writebacks)
2637     QueueEntry* entry = cache.getNextQueueEntry();
2638
2639     if (!entry) {
2640         // can happen if e.g. we attempt a writeback and fail, but
2641         // before the retry, the writeback is eliminated because
2642         // we snoop another cache's ReadEx.
2643     } else {
2644         // let our snoop responses go first if there are responses to
2645         // the same addresses
2646         if (checkConflictingSnoop(entry->getTarget()->pkt)) {
2647             return;
2648         }
2649         waitingOnRetry = entry->sendPacket(cache);
2650     }
2651
2652     // if we succeeded and are not waiting for a retry, schedule the
2653     // next send considering when the next queue is ready, note that
2654     // snoop responses have their own packet queue and thus schedule
2655     // their own events
2656     if (!waitingOnRetry) {
2657         schedSendEvent(cache.nextQueueReadyTime());
2658     }
2659 }
2660
2661 BaseCache::MemSidePort::MemSidePort(const std::string &_name,
2662                                     BaseCache *_cache,
2663                                     const std::string &_label)
2664     : CacheMasterPort(_name, _cache, _reqQueue, _snoopRespQueue),
2665       _reqQueue(*_cache, *this, _snoopRespQueue, _label),
2666       _snoopRespQueue(*_cache, *this, true, _label), cache(_cache)
2667 {
2668 }
2669
2670 void
2671 WriteAllocator::updateMode(Addr write_addr, unsigned write_size,
2672                            Addr blk_addr)
2673 {
2674     // check if we are continuing where the last write ended
2675     if (nextAddr == write_addr) {
2676         delayCtr[blk_addr] = delayThreshold;
2677         // stop if we have already saturated
2678         if (mode != WriteMode::NO_ALLOCATE) {
2679             byteCount += write_size;
2680             // switch to streaming mode if we have passed the lower
2681             // threshold
2682             if (mode == WriteMode::ALLOCATE &&
2683                 byteCount > coalesceLimit) {
2684                 mode = WriteMode::COALESCE;
2685                 DPRINTF(Cache, "Switched to write coalescing\n");
2686             } else if (mode == WriteMode::COALESCE &&
2687                        byteCount > noAllocateLimit) {
2688                 // and continue and switch to non-allocating mode if we
2689                 // pass the upper threshold
2690                 mode = WriteMode::NO_ALLOCATE;
2691                 DPRINTF(Cache, "Switched to write-no-allocate\n");
2692             }
2693         }
2694     } else {
2695         // we did not see a write matching the previous one, start
2696         // over again
2697         byteCount = write_size;
2698         mode = WriteMode::ALLOCATE;
2699         resetDelay(blk_addr);
2700     }
2701     nextAddr = write_addr + write_size;
2702 }
2703
2704 WriteAllocator*
2705 WriteAllocatorParams::create()
2706 {
2707     return new WriteAllocator(this);
2708 }