src/mem/cache/base.hh

   1 /*
   2  * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  */
  40
  41 /**
  42  * @file
  43  * Declares a basic cache interface BaseCache.
  44  */
  45
  46 #ifndef __MEM_CACHE_BASE_HH__
  47 #define __MEM_CACHE_BASE_HH__
  48
  49 #include <cassert>
  50 #include <cstdint>
  51 #include <string>
  52
  53 #include "base/addr_range.hh"
  54 #include "base/statistics.hh"
  55 #include "base/trace.hh"
  56 #include "base/types.hh"
  57 #include "debug/Cache.hh"
  58 #include "debug/CachePort.hh"
  59 #include "enums/Clusivity.hh"
  60 #include "mem/cache/cache_blk.hh"
  61 #include "mem/cache/compressors/base.hh"
  62 #include "mem/cache/mshr_queue.hh"
  63 #include "mem/cache/tags/base.hh"
  64 #include "mem/cache/write_queue.hh"
  65 #include "mem/cache/write_queue_entry.hh"
  66 #include "mem/packet.hh"
  67 #include "mem/packet_queue.hh"
  68 #include "mem/qport.hh"
  69 #include "mem/request.hh"
  70 #include "params/WriteAllocator.hh"
  71 #include "sim/clocked_object.hh"
  72 #include "sim/eventq.hh"
  73 #include "sim/probe/probe.hh"
  74 #include "sim/serialize.hh"
  75 #include "sim/sim_exit.hh"
  76 #include "sim/system.hh"
  77
  78 namespace Prefetcher {
  79     class Base;
  80 }
  81 class MSHR;
  82 class RequestPort;
  83 class QueueEntry;
  84 struct BaseCacheParams;
  85
  86 /**
  87  * A basic cache interface. Implements some common functions for speed.
  88  */
  89 class BaseCache : public ClockedObject
  90 {
  91   protected:
  92     /**
  93      * Indexes to enumerate the MSHR queues.
  94      */
  95     enum MSHRQueueIndex {
  96         MSHRQueue_MSHRs,
  97         MSHRQueue_WriteBuffer
  98     };
  99
 100   public:
 101     /**
 102      * Reasons for caches to be blocked.
 103      */
 104     enum BlockedCause {
 105         Blocked_NoMSHRs = MSHRQueue_MSHRs,
 106         Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
 107         Blocked_NoTargets,
 108         NUM_BLOCKED_CAUSES
 109     };
 110
 111   protected:
 112
 113     /**
 114      * A cache master port is used for the memory-side port of the
 115      * cache, and in addition to the basic timing port that only sends
 116      * response packets through a transmit list, it also offers the
 117      * ability to schedule and send request packets (requests &
 118      * writebacks). The send event is scheduled through schedSendEvent,
 119      * and the sendDeferredPacket of the timing port is modified to
 120      * consider both the transmit list and the requests from the MSHR.
 121      */
 122     class CacheMasterPort : public QueuedMasterPort
 123     {
 124
 125       public:
 126
 127         /**
 128          * Schedule a send of a request packet (from the MSHR). Note
 129          * that we could already have a retry outstanding.
 130          */
 131         void schedSendEvent(Tick time)
 132         {
 133             DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
 134             reqQueue.schedSendEvent(time);
 135         }
 136
 137       protected:
 138
 139         CacheMasterPort(const std::string &_name, BaseCache *_cache,
 140                         ReqPacketQueue &_reqQueue,
 141                         SnoopRespPacketQueue &_snoopRespQueue) :
 142             QueuedMasterPort(_name, _cache, _reqQueue, _snoopRespQueue)
 143         { }
 144
 145         /**
 146          * Memory-side port always snoops.
 147          *
 148          * @return always true
 149          */
 150         virtual bool isSnooping() const { return true; }
 151     };
 152
 153     /**
 154      * Override the default behaviour of sendDeferredPacket to enable
 155      * the memory-side cache port to also send requests based on the
 156      * current MSHR status. This queue has a pointer to our specific
 157      * cache implementation and is used by the MemSidePort.
 158      */
 159     class CacheReqPacketQueue : public ReqPacketQueue
 160     {
 161
 162       protected:
 163
 164         BaseCache &cache;
 165         SnoopRespPacketQueue &snoopRespQueue;
 166
 167       public:
 168
 169         CacheReqPacketQueue(BaseCache &cache, RequestPort &port,
 170                             SnoopRespPacketQueue &snoop_resp_queue,
 171                             const std::string &label) :
 172             ReqPacketQueue(cache, port, label), cache(cache),
 173             snoopRespQueue(snoop_resp_queue) { }
 174
 175         /**
 176          * Override the normal sendDeferredPacket and do not only
 177          * consider the transmit list (used for responses), but also
 178          * requests.
 179          */
 180         virtual void sendDeferredPacket();
 181
 182         /**
 183          * Check if there is a conflicting snoop response about to be
 184          * send out, and if so simply stall any requests, and schedule
 185          * a send event at the same time as the next snoop response is
 186          * being sent out.
 187          *
 188          * @param pkt The packet to check for conflicts against.
 189          */
 190         bool checkConflictingSnoop(const PacketPtr pkt)
 191         {
 192             if (snoopRespQueue.checkConflict(pkt, cache.blkSize)) {
 193                 DPRINTF(CachePort, "Waiting for snoop response to be "
 194                         "sent\n");
 195                 Tick when = snoopRespQueue.deferredPacketReadyTime();
 196                 schedSendEvent(when);
 197                 return true;
 198             }
 199             return false;
 200         }
 201     };
 202
 203
 204     /**
 205      * The memory-side port extends the base cache master port with
 206      * access functions for functional, atomic and timing snoops.
 207      */
 208     class MemSidePort : public CacheMasterPort
 209     {
 210       private:
 211
 212         /** The cache-specific queue. */
 213         CacheReqPacketQueue _reqQueue;
 214
 215         SnoopRespPacketQueue _snoopRespQueue;
 216
 217         // a pointer to our specific cache implementation
 218         BaseCache *cache;
 219
 220       protected:
 221
 222         virtual void recvTimingSnoopReq(PacketPtr pkt);
 223
 224         virtual bool recvTimingResp(PacketPtr pkt);
 225
 226         virtual Tick recvAtomicSnoop(PacketPtr pkt);
 227
 228         virtual void recvFunctionalSnoop(PacketPtr pkt);
 229
 230       public:
 231
 232         MemSidePort(const std::string &_name, BaseCache *_cache,
 233                     const std::string &_label);
 234     };
 235
 236     /**
 237      * A cache slave port is used for the CPU-side port of the cache,
 238      * and it is basically a simple timing port that uses a transmit
 239      * list for responses to the CPU (or connected master). In
 240      * addition, it has the functionality to block the port for
 241      * incoming requests. If blocked, the port will issue a retry once
 242      * unblocked.
 243      */
 244     class CacheSlavePort : public QueuedSlavePort
 245     {
 246
 247       public:
 248
 249         /** Do not accept any new requests. */
 250         void setBlocked();
 251
 252         /** Return to normal operation and accept new requests. */
 253         void clearBlocked();
 254
 255         bool isBlocked() const { return blocked; }
 256
 257       protected:
 258
 259         CacheSlavePort(const std::string &_name, BaseCache *_cache,
 260                        const std::string &_label);
 261
 262         /** A normal packet queue used to store responses. */
 263         RespPacketQueue queue;
 264
 265         bool blocked;
 266
 267         bool mustSendRetry;
 268
 269       private:
 270
 271         void processSendRetry();
 272
 273         EventFunctionWrapper sendRetryEvent;
 274
 275     };
 276
 277     /**
 278      * The CPU-side port extends the base cache slave port with access
 279      * functions for functional, atomic and timing requests.
 280      */
 281     class CpuSidePort : public CacheSlavePort
 282     {
 283       private:
 284
 285         // a pointer to our specific cache implementation
 286         BaseCache *cache;
 287
 288       protected:
 289         virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
 290
 291         virtual bool tryTiming(PacketPtr pkt) override;
 292
 293         virtual bool recvTimingReq(PacketPtr pkt) override;
 294
 295         virtual Tick recvAtomic(PacketPtr pkt) override;
 296
 297         virtual void recvFunctional(PacketPtr pkt) override;
 298
 299         virtual AddrRangeList getAddrRanges() const override;
 300
 301       public:
 302
 303         CpuSidePort(const std::string &_name, BaseCache *_cache,
 304                     const std::string &_label);
 305
 306     };
 307
 308     CpuSidePort cpuSidePort;
 309     MemSidePort memSidePort;
 310
 311   protected:
 312
 313     /** Miss status registers */
 314     MSHRQueue mshrQueue;
 315
 316     /** Write/writeback buffer */
 317     WriteQueue writeBuffer;
 318
 319     /** Tag and data Storage */
 320     BaseTags *tags;
 321
 322     /** Compression method being used. */
 323     Compressor::Base* compressor;
 324
 325     /** Prefetcher */
 326     Prefetcher::Base *prefetcher;
 327
 328     /** To probe when a cache hit occurs */
 329     ProbePointArg<PacketPtr> *ppHit;
 330
 331     /** To probe when a cache miss occurs */
 332     ProbePointArg<PacketPtr> *ppMiss;
 333
 334     /** To probe when a cache fill occurs */
 335     ProbePointArg<PacketPtr> *ppFill;
 336
 337     /**
 338      * The writeAllocator drive optimizations for streaming writes.
 339      * It first determines whether a WriteReq MSHR should be delayed,
 340      * thus ensuring that we wait longer in cases when we are write
 341      * coalescing and allowing all the bytes of the line to be written
 342      * before the MSHR packet is sent downstream. This works in unison
 343      * with the tracking in the MSHR to check if the entire line is
 344      * written. The write mode also affects the behaviour on filling
 345      * any whole-line writes. Normally the cache allocates the line
 346      * when receiving the InvalidateResp, but after seeing enough
 347      * consecutive lines we switch to using the tempBlock, and thus
 348      * end up not allocating the line, and instead turning the
 349      * whole-line write into a writeback straight away.
 350      */
 351     WriteAllocator * const writeAllocator;
 352
 353     /**
 354      * Temporary cache block for occasional transitory use.  We use
 355      * the tempBlock to fill when allocation fails (e.g., when there
 356      * is an outstanding request that accesses the victim block) or
 357      * when we want to avoid allocation (e.g., exclusive caches)
 358      */
 359     TempCacheBlk *tempBlock;
 360
 361     /**
 362      * Upstream caches need this packet until true is returned, so
 363      * hold it for deletion until a subsequent call
 364      */
 365     std::unique_ptr<Packet> pendingDelete;
 366
 367     /**
 368      * Mark a request as in service (sent downstream in the memory
 369      * system), effectively making this MSHR the ordering point.
 370      */
 371     void markInService(MSHR *mshr, bool pending_modified_resp)
 372     {
 373         bool wasFull = mshrQueue.isFull();
 374         mshrQueue.markInService(mshr, pending_modified_resp);
 375
 376         if (wasFull && !mshrQueue.isFull()) {
 377             clearBlocked(Blocked_NoMSHRs);
 378         }
 379     }
 380
 381     void markInService(WriteQueueEntry *entry)
 382     {
 383         bool wasFull = writeBuffer.isFull();
 384         writeBuffer.markInService(entry);
 385
 386         if (wasFull && !writeBuffer.isFull()) {
 387             clearBlocked(Blocked_NoWBBuffers);
 388         }
 389     }
 390
 391     /**
 392      * Determine whether we should allocate on a fill or not. If this
 393      * cache is mostly inclusive with regards to the upstream cache(s)
 394      * we always allocate (for any non-forwarded and cacheable
 395      * requests). In the case of a mostly exclusive cache, we allocate
 396      * on fill if the packet did not come from a cache, thus if we:
 397      * are dealing with a whole-line write (the latter behaves much
 398      * like a writeback), the original target packet came from a
 399      * non-caching source, or if we are performing a prefetch or LLSC.
 400      *
 401      * @param cmd Command of the incoming requesting packet
 402      * @return Whether we should allocate on the fill
 403      */
 404     inline bool allocOnFill(MemCmd cmd) const
 405     {
 406         return clusivity == Enums::mostly_incl ||
 407             cmd == MemCmd::WriteLineReq ||
 408             cmd == MemCmd::ReadReq ||
 409             cmd == MemCmd::WriteReq ||
 410             cmd.isPrefetch() ||
 411             cmd.isLLSC();
 412     }
 413
 414     /**
 415      * Regenerate block address using tags.
 416      * Block address regeneration depends on whether we're using a temporary
 417      * block or not.
 418      *
 419      * @param blk The block to regenerate address.
 420      * @return The block's address.
 421      */
 422     Addr regenerateBlkAddr(CacheBlk* blk);
 423
 424     /**
 425      * Calculate latency of accesses that only touch the tag array.
 426      * @sa calculateAccessLatency
 427      *
 428      * @param delay The delay until the packet's metadata is present.
 429      * @param lookup_lat Latency of the respective tag lookup.
 430      * @return The number of ticks that pass due to a tag-only access.
 431      */
 432     Cycles calculateTagOnlyLatency(const uint32_t delay,
 433                                    const Cycles lookup_lat) const;
 434     /**
 435      * Calculate access latency in ticks given a tag lookup latency, and
 436      * whether access was a hit or miss.
 437      *
 438      * @param blk The cache block that was accessed.
 439      * @param delay The delay until the packet's metadata is present.
 440      * @param lookup_lat Latency of the respective tag lookup.
 441      * @return The number of ticks that pass due to a block access.
 442      */
 443     Cycles calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
 444                                   const Cycles lookup_lat) const;
 445
 446     /**
 447      * Does all the processing necessary to perform the provided request.
 448      * @param pkt The memory request to perform.
 449      * @param blk The cache block to be updated.
 450      * @param lat The latency of the access.
 451      * @param writebacks List for any writebacks that need to be performed.
 452      * @return Boolean indicating whether the request was satisfied.
 453      */
 454     virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 455                         PacketList &writebacks);
 456
 457     /*
 458      * Handle a timing request that hit in the cache
 459      *
 460      * @param ptk The request packet
 461      * @param blk The referenced block
 462      * @param request_time The tick at which the block lookup is compete
 463      */
 464     virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
 465                                     Tick request_time);
 466
 467     /*
 468      * Handle a timing request that missed in the cache
 469      *
 470      * Implementation specific handling for different cache
 471      * implementations
 472      *
 473      * @param ptk The request packet
 474      * @param blk The referenced block
 475      * @param forward_time The tick at which we can process dependent requests
 476      * @param request_time The tick at which the block lookup is compete
 477      */
 478     virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
 479                                      Tick forward_time,
 480                                      Tick request_time) = 0;
 481
 482     /*
 483      * Handle a timing request that missed in the cache
 484      *
 485      * Common functionality across different cache implementations
 486      *
 487      * @param ptk The request packet
 488      * @param blk The referenced block
 489      * @param mshr Any existing mshr for the referenced cache block
 490      * @param forward_time The tick at which we can process dependent requests
 491      * @param request_time The tick at which the block lookup is compete
 492      */
 493     void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 494                              Tick forward_time, Tick request_time);
 495
 496     /**
 497      * Performs the access specified by the request.
 498      * @param pkt The request to perform.
 499      */
 500     virtual void recvTimingReq(PacketPtr pkt);
 501
 502     /**
 503      * Handling the special case of uncacheable write responses to
 504      * make recvTimingResp less cluttered.
 505      */
 506     void handleUncacheableWriteResp(PacketPtr pkt);
 507
 508     /**
 509      * Service non-deferred MSHR targets using the received response
 510      *
 511      * Iterates through the list of targets that can be serviced with
 512      * the current response.
 513      *
 514      * @param mshr The MSHR that corresponds to the reponse
 515      * @param pkt The response packet
 516      * @param blk The reference block
 517      */
 518     virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
 519                                     CacheBlk *blk) = 0;
 520
 521     /**
 522      * Handles a response (cache line fill/write ack) from the bus.
 523      * @param pkt The response packet
 524      */
 525     virtual void recvTimingResp(PacketPtr pkt);
 526
 527     /**
 528      * Snoops bus transactions to maintain coherence.
 529      * @param pkt The current bus transaction.
 530      */
 531     virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
 532
 533     /**
 534      * Handle a snoop response.
 535      * @param pkt Snoop response packet
 536      */
 537     virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
 538
 539     /**
 540      * Handle a request in atomic mode that missed in this cache
 541      *
 542      * Creates a downstream request, sends it to the memory below and
 543      * handles the response. As we are in atomic mode all operations
 544      * are performed immediately.
 545      *
 546      * @param pkt The packet with the requests
 547      * @param blk The referenced block
 548      * @param writebacks A list with packets for any performed writebacks
 549      * @return Cycles for handling the request
 550      */
 551     virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
 552                                        PacketList &writebacks) = 0;
 553
 554     /**
 555      * Performs the access specified by the request.
 556      * @param pkt The request to perform.
 557      * @return The number of ticks required for the access.
 558      */
 559     virtual Tick recvAtomic(PacketPtr pkt);
 560
 561     /**
 562      * Snoop for the provided request in the cache and return the estimated
 563      * time taken.
 564      * @param pkt The memory request to snoop
 565      * @return The number of ticks required for the snoop.
 566      */
 567     virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
 568
 569     /**
 570      * Performs the access specified by the request.
 571      *
 572      * @param pkt The request to perform.
 573      * @param fromCpuSide from the CPU side port or the memory side port
 574      */
 575     virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
 576
 577     /**
 578      * Handle doing the Compare and Swap function for SPARC.
 579      */
 580     void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
 581
 582     /**
 583      * Return the next queue entry to service, either a pending miss
 584      * from the MSHR queue, a buffered write from the write buffer, or
 585      * something from the prefetcher. This function is responsible
 586      * for prioritizing among those sources on the fly.
 587      */
 588     QueueEntry* getNextQueueEntry();
 589
 590     /**
 591      * Insert writebacks into the write buffer
 592      */
 593     virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
 594
 595     /**
 596      * Send writebacks down the memory hierarchy in atomic mode
 597      */
 598     virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
 599
 600     /**
 601      * Create an appropriate downstream bus request packet.
 602      *
 603      * Creates a new packet with the request to be send to the memory
 604      * below, or nullptr if the current request in cpu_pkt should just
 605      * be forwarded on.
 606      *
 607      * @param cpu_pkt The miss packet that needs to be satisfied.
 608      * @param blk The referenced block, can be nullptr.
 609      * @param needs_writable Indicates that the block must be writable
 610      * even if the request in cpu_pkt doesn't indicate that.
 611      * @param is_whole_line_write True if there are writes for the
 612      * whole line
 613      * @return A packet send to the memory below
 614      */
 615     virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
 616                                        bool needs_writable,
 617                                        bool is_whole_line_write) const = 0;
 618
 619     /**
 620      * Determine if clean lines should be written back or not. In
 621      * cases where a downstream cache is mostly inclusive we likely
 622      * want it to act as a victim cache also for lines that have not
 623      * been modified. Hence, we cannot simply drop the line (or send a
 624      * clean evict), but rather need to send the actual data.
 625      */
 626     const bool writebackClean;
 627
 628     /**
 629      * Writebacks from the tempBlock, resulting on the response path
 630      * in atomic mode, must happen after the call to recvAtomic has
 631      * finished (for the right ordering of the packets). We therefore
 632      * need to hold on to the packets, and have a method and an event
 633      * to send them.
 634      */
 635     PacketPtr tempBlockWriteback;
 636
 637     /**
 638      * Send the outstanding tempBlock writeback. To be called after
 639      * recvAtomic finishes in cases where the block we filled is in
 640      * fact the tempBlock, and now needs to be written back.
 641      */
 642     void writebackTempBlockAtomic() {
 643         assert(tempBlockWriteback != nullptr);
 644         PacketList writebacks{tempBlockWriteback};
 645         doWritebacksAtomic(writebacks);
 646         tempBlockWriteback = nullptr;
 647     }
 648
 649     /**
 650      * An event to writeback the tempBlock after recvAtomic
 651      * finishes. To avoid other calls to recvAtomic getting in
 652      * between, we create this event with a higher priority.
 653      */
 654     EventFunctionWrapper writebackTempBlockAtomicEvent;
 655
 656     /**
 657      * When a block is overwriten, its compression information must be updated,
 658      * and it may need to be recompressed. If the compression size changes, the
 659      * block may either become smaller, in which case there is no side effect,
 660      * or bigger (data expansion; fat write), in which case the block might not
 661      * fit in its current location anymore. If that happens, there are usually
 662      * two options to be taken:
 663      *
 664      * - The co-allocated blocks must be evicted to make room for this block.
 665      *   Simpler, but ignores replacement data.
 666      * - The block itself is moved elsewhere (used in policies where the CF
 667      *   determines the location of the block).
 668      *
 669      * This implementation uses the first approach.
 670      *
 671      * Notice that this is only called for writebacks, which means that L1
 672      * caches (which see regular Writes), do not support compression.
 673      * @sa CompressedTags
 674      *
 675      * @param blk The block to be overwriten.
 676      * @param data A pointer to the data to be compressed (blk's new data).
 677      * @param writebacks List for any writebacks that need to be performed.
 678      * @return Whether operation is successful or not.
 679      */
 680     bool updateCompressionData(CacheBlk *blk, const uint64_t* data,
 681                                PacketList &writebacks);
 682
 683     /**
 684      * Perform any necessary updates to the block and perform any data
 685      * exchange between the packet and the block. The flags of the
 686      * packet are also set accordingly.
 687      *
 688      * @param pkt Request packet from upstream that hit a block
 689      * @param blk Cache block that the packet hit
 690      * @param deferred_response Whether this request originally missed
 691      * @param pending_downgrade Whether the writable flag is to be removed
 692      */
 693     virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
 694                                 bool deferred_response = false,
 695                                 bool pending_downgrade = false);
 696
 697     /**
 698      * Maintain the clusivity of this cache by potentially
 699      * invalidating a block. This method works in conjunction with
 700      * satisfyRequest, but is separate to allow us to handle all MSHR
 701      * targets before potentially dropping a block.
 702      *
 703      * @param from_cache Whether we have dealt with a packet from a cache
 704      * @param blk The block that should potentially be dropped
 705      */
 706     void maintainClusivity(bool from_cache, CacheBlk *blk);
 707
 708     /**
 709      * Try to evict the given blocks. If any of them is a transient eviction,
 710      * that is, the block is present in the MSHR queue all evictions are
 711      * cancelled since handling such cases has not been implemented.
 712      *
 713      * @param evict_blks Blocks marked for eviction.
 714      * @param writebacks List for any writebacks that need to be performed.
 715      * @return False if any of the evicted blocks is in transient state.
 716      */
 717     bool handleEvictions(std::vector<CacheBlk*> &evict_blks,
 718         PacketList &writebacks);
 719
 720     /**
 721      * Handle a fill operation caused by a received packet.
 722      *
 723      * Populates a cache block and handles all outstanding requests for the
 724      * satisfied fill request. This version takes two memory requests. One
 725      * contains the fill data, the other is an optional target to satisfy.
 726      * Note that the reason we return a list of writebacks rather than
 727      * inserting them directly in the write buffer is that this function
 728      * is called by both atomic and timing-mode accesses, and in atomic
 729      * mode we don't mess with the write buffer (we just perform the
 730      * writebacks atomically once the original request is complete).
 731      *
 732      * @param pkt The memory request with the fill data.
 733      * @param blk The cache block if it already exists.
 734      * @param writebacks List for any writebacks that need to be performed.
 735      * @param allocate Whether to allocate a block or use the temp block
 736      * @return Pointer to the new cache block.
 737      */
 738     CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
 739                          PacketList &writebacks, bool allocate);
 740
 741     /**
 742      * Allocate a new block and perform any necessary writebacks
 743      *
 744      * Find a victim block and if necessary prepare writebacks for any
 745      * existing data. May return nullptr if there are no replaceable
 746      * blocks. If a replaceable block is found, it inserts the new block in
 747      * its place. The new block, however, is not set as valid yet.
 748      *
 749      * @param pkt Packet holding the address to update
 750      * @param writebacks A list of writeback packets for the evicted blocks
 751      * @return the allocated block
 752      */
 753     CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
 754     /**
 755      * Evict a cache block.
 756      *
 757      * Performs a writeback if necesssary and invalidates the block
 758      *
 759      * @param blk Block to invalidate
 760      * @return A packet with the writeback, can be nullptr
 761      */
 762     M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
 763
 764     /**
 765      * Evict a cache block.
 766      *
 767      * Performs a writeback if necesssary and invalidates the block
 768      *
 769      * @param blk Block to invalidate
 770      * @param writebacks Return a list of packets with writebacks
 771      */
 772     void evictBlock(CacheBlk *blk, PacketList &writebacks);
 773
 774     /**
 775      * Invalidate a cache block.
 776      *
 777      * @param blk Block to invalidate
 778      */
 779     void invalidateBlock(CacheBlk *blk);
 780
 781     /**
 782      * Create a writeback request for the given block.
 783      *
 784      * @param blk The block to writeback.
 785      * @return The writeback request for the block.
 786      */
 787     PacketPtr writebackBlk(CacheBlk *blk);
 788
 789     /**
 790      * Create a writeclean request for the given block.
 791      *
 792      * Creates a request that writes the block to the cache below
 793      * without evicting the block from the current cache.
 794      *
 795      * @param blk The block to write clean.
 796      * @param dest The destination of the write clean operation.
 797      * @param id Use the given packet id for the write clean operation.
 798      * @return The generated write clean packet.
 799      */
 800     PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
 801
 802     /**
 803      * Write back dirty blocks in the cache using functional accesses.
 804      */
 805     virtual void memWriteback() override;
 806
 807     /**
 808      * Invalidates all blocks in the cache.
 809      *
 810      * @warn Dirty cache lines will not be written back to
 811      * memory. Make sure to call functionalWriteback() first if you
 812      * want the to write them to memory.
 813      */
 814     virtual void memInvalidate() override;
 815
 816     /**
 817      * Determine if there are any dirty blocks in the cache.
 818      *
 819      * @return true if at least one block is dirty, false otherwise.
 820      */
 821     bool isDirty() const;
 822
 823     /**
 824      * Determine if an address is in the ranges covered by this
 825      * cache. This is useful to filter snoops.
 826      *
 827      * @param addr Address to check against
 828      *
 829      * @return If the address in question is in range
 830      */
 831     bool inRange(Addr addr) const;
 832
 833     /**
 834      * Find next request ready time from among possible sources.
 835      */
 836     Tick nextQueueReadyTime() const;
 837
 838     /** Block size of this cache */
 839     const unsigned blkSize;
 840
 841     /**
 842      * The latency of tag lookup of a cache. It occurs when there is
 843      * an access to the cache.
 844      */
 845     const Cycles lookupLatency;
 846
 847     /**
 848      * The latency of data access of a cache. It occurs when there is
 849      * an access to the cache.
 850      */
 851     const Cycles dataLatency;
 852
 853     /**
 854      * This is the forward latency of the cache. It occurs when there
 855      * is a cache miss and a request is forwarded downstream, in
 856      * particular an outbound miss.
 857      */
 858     const Cycles forwardLatency;
 859
 860     /** The latency to fill a cache block */
 861     const Cycles fillLatency;
 862
 863     /**
 864      * The latency of sending reponse to its upper level cache/core on
 865      * a linefill. The responseLatency parameter captures this
 866      * latency.
 867      */
 868     const Cycles responseLatency;
 869
 870     /**
 871      * Whether tags and data are accessed sequentially.
 872      */
 873     const bool sequentialAccess;
 874
 875     /** The number of targets for each MSHR. */
 876     const int numTarget;
 877
 878     /** Do we forward snoops from mem side port through to cpu side port? */
 879     bool forwardSnoops;
 880
 881     /**
 882      * Clusivity with respect to the upstream cache, determining if we
 883      * fill into both this cache and the cache above on a miss. Note
 884      * that we currently do not support strict clusivity policies.
 885      */
 886     const Enums::Clusivity clusivity;
 887
 888     /**
 889      * Is this cache read only, for example the instruction cache, or
 890      * table-walker cache. A cache that is read only should never see
 891      * any writes, and should never get any dirty data (and hence
 892      * never have to do any writebacks).
 893      */
 894     const bool isReadOnly;
 895
 896     /**
 897      * Bit vector of the blocking reasons for the access path.
 898      * @sa #BlockedCause
 899      */
 900     uint8_t blocked;
 901
 902     /** Increasing order number assigned to each incoming request. */
 903     uint64_t order;
 904
 905     /** Stores time the cache blocked for statistics. */
 906     Cycles blockedCycle;
 907
 908     /** Pointer to the MSHR that has no targets. */
 909     MSHR *noTargetMSHR;
 910
 911     /** The number of misses to trigger an exit event. */
 912     Counter missCount;
 913
 914     /**
 915      * The address range to which the cache responds on the CPU side.
 916      * Normally this is all possible memory addresses. */
 917     const AddrRangeList addrRanges;
 918
 919   public:
 920     /** System we are currently operating in. */
 921     System *system;
 922
 923     struct CacheCmdStats : public Stats::Group
 924     {
 925         CacheCmdStats(BaseCache &c, const std::string &name);
 926
 927         /**
 928          * Callback to register stats from parent
 929          * CacheStats::regStats(). We can't use the normal flow since
 930          * there is is no guaranteed order and CacheStats::regStats()
 931          * needs to rely on these stats being initialised.
 932          */
 933         void regStatsFromParent();
 934
 935         const BaseCache &cache;
 936
 937         /** Number of hits per thread for each type of command.
 938             @sa Packet::Command */
 939         Stats::Vector hits;
 940         /** Number of misses per thread for each type of command.
 941             @sa Packet::Command */
 942         Stats::Vector misses;
 943         /**
 944          * Total number of cycles per thread/command spent waiting for a miss.
 945          * Used to calculate the average miss latency.
 946          */
 947         Stats::Vector missLatency;
 948         /** The number of accesses per command and thread. */
 949         Stats::Formula accesses;
 950         /** The miss rate per command and thread. */
 951         Stats::Formula missRate;
 952         /** The average miss latency per command and thread. */
 953         Stats::Formula avgMissLatency;
 954         /** Number of misses that hit in the MSHRs per command and thread. */
 955         Stats::Vector mshr_hits;
 956         /** Number of misses that miss in the MSHRs, per command and thread. */
 957         Stats::Vector mshr_misses;
 958         /** Number of misses that miss in the MSHRs, per command and thread. */
 959         Stats::Vector mshr_uncacheable;
 960         /** Total cycle latency of each MSHR miss, per command and thread. */
 961         Stats::Vector mshr_miss_latency;
 962         /** Total cycle latency of each MSHR miss, per command and thread. */
 963         Stats::Vector mshr_uncacheable_lat;
 964         /** The miss rate in the MSHRs pre command and thread. */
 965         Stats::Formula mshrMissRate;
 966         /** The average latency of an MSHR miss, per command and thread. */
 967         Stats::Formula avgMshrMissLatency;
 968         /** The average latency of an MSHR miss, per command and thread. */
 969         Stats::Formula avgMshrUncacheableLatency;
 970     };
 971
 972     struct CacheStats : public Stats::Group
 973     {
 974         CacheStats(BaseCache &c);
 975
 976         void regStats() override;
 977
 978         CacheCmdStats &cmdStats(const PacketPtr p) {
 979             return *cmd[p->cmdToIndex()];
 980         }
 981
 982         const BaseCache &cache;
 983
 984         /** Number of hits for demand accesses. */
 985         Stats::Formula demandHits;
 986         /** Number of hit for all accesses. */
 987         Stats::Formula overallHits;
 988
 989         /** Number of misses for demand accesses. */
 990         Stats::Formula demandMisses;
 991         /** Number of misses for all accesses. */
 992         Stats::Formula overallMisses;
 993
 994         /** Total number of cycles spent waiting for demand misses. */
 995         Stats::Formula demandMissLatency;
 996         /** Total number of cycles spent waiting for all misses. */
 997         Stats::Formula overallMissLatency;
 998
 999         /** The number of demand accesses. */
1000         Stats::Formula demandAccesses;
1001         /** The number of overall accesses. */
1002         Stats::Formula overallAccesses;
1003
1004         /** The miss rate of all demand accesses. */
1005         Stats::Formula demandMissRate;
1006         /** The miss rate for all accesses. */
1007         Stats::Formula overallMissRate;
1008
1009         /** The average miss latency for demand misses. */
1010         Stats::Formula demandAvgMissLatency;
1011         /** The average miss latency for all misses. */
1012         Stats::Formula overallAvgMissLatency;
1013
1014         /** The total number of cycles blocked for each blocked cause. */
1015         Stats::Vector blocked_cycles;
1016         /** The number of times this cache blocked for each blocked cause. */
1017         Stats::Vector blocked_causes;
1018
1019         /** The average number of cycles blocked for each blocked cause. */
1020         Stats::Formula avg_blocked;
1021
1022         /** The number of times a HW-prefetched block is evicted w/o
1023          * reference. */
1024         Stats::Scalar unusedPrefetches;
1025
1026         /** Number of blocks written back per thread. */
1027         Stats::Vector writebacks;
1028
1029         /** Demand misses that hit in the MSHRs. */
1030         Stats::Formula demandMshrHits;
1031         /** Total number of misses that hit in the MSHRs. */
1032         Stats::Formula overallMshrHits;
1033
1034         /** Demand misses that miss in the MSHRs. */
1035         Stats::Formula demandMshrMisses;
1036         /** Total number of misses that miss in the MSHRs. */
1037         Stats::Formula overallMshrMisses;
1038
1039         /** Total number of misses that miss in the MSHRs. */
1040         Stats::Formula overallMshrUncacheable;
1041
1042         /** Total cycle latency of demand MSHR misses. */
1043         Stats::Formula demandMshrMissLatency;
1044         /** Total cycle latency of overall MSHR misses. */
1045         Stats::Formula overallMshrMissLatency;
1046
1047         /** Total cycle latency of overall MSHR misses. */
1048         Stats::Formula overallMshrUncacheableLatency;
1049
1050         /** The demand miss rate in the MSHRs. */
1051         Stats::Formula demandMshrMissRate;
1052         /** The overall miss rate in the MSHRs. */
1053         Stats::Formula overallMshrMissRate;
1054
1055         /** The average latency of a demand MSHR miss. */
1056         Stats::Formula demandAvgMshrMissLatency;
1057         /** The average overall latency of an MSHR miss. */
1058         Stats::Formula overallAvgMshrMissLatency;
1059
1060         /** The average overall latency of an MSHR miss. */
1061         Stats::Formula overallAvgMshrUncacheableLatency;
1062
1063         /** Number of replacements of valid blocks. */
1064         Stats::Scalar replacements;
1065
1066         /** Number of data expansions. */
1067         Stats::Scalar dataExpansions;
1068
1069         /** Per-command statistics */
1070         std::vector<std::unique_ptr<CacheCmdStats>> cmd;
1071     } stats;
1072
1073     /** Registers probes. */
1074     void regProbePoints() override;
1075
1076   public:
1077     BaseCache(const BaseCacheParams *p, unsigned blk_size);
1078     ~BaseCache();
1079
1080     void init() override;
1081
1082     Port &getPort(const std::string &if_name,
1083                   PortID idx=InvalidPortID) override;
1084
1085     /**
1086      * Query block size of a cache.
1087      * @return  The block size
1088      */
1089     unsigned
1090     getBlockSize() const
1091     {
1092         return blkSize;
1093     }
1094
1095     const AddrRangeList &getAddrRanges() const { return addrRanges; }
1096
1097     MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1098     {
1099         MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1100                                         pkt, time, order++,
1101                                         allocOnFill(pkt->cmd));
1102
1103         if (mshrQueue.isFull()) {
1104             setBlocked((BlockedCause)MSHRQueue_MSHRs);
1105         }
1106
1107         if (sched_send) {
1108             // schedule the send
1109             schedMemSideSendEvent(time);
1110         }
1111
1112         return mshr;
1113     }
1114
1115     void allocateWriteBuffer(PacketPtr pkt, Tick time)
1116     {
1117         // should only see writes or clean evicts here
1118         assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1119
1120         Addr blk_addr = pkt->getBlockAddr(blkSize);
1121
1122         // If using compression, on evictions the block is decompressed and
1123         // the operation's latency is added to the payload delay. Consume
1124         // that payload delay here, meaning that the data is always stored
1125         // uncompressed in the writebuffer
1126         if (compressor) {
1127             time += pkt->payloadDelay;
1128             pkt->payloadDelay = 0;
1129         }
1130
1131         WriteQueueEntry *wq_entry =
1132             writeBuffer.findMatch(blk_addr, pkt->isSecure());
1133         if (wq_entry && !wq_entry->inService) {
1134             DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1135         }
1136
1137         writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1138
1139         if (writeBuffer.isFull()) {
1140             setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1141         }
1142
1143         // schedule the send
1144         schedMemSideSendEvent(time);
1145     }
1146
1147     /**
1148      * Returns true if the cache is blocked for accesses.
1149      */
1150     bool isBlocked() const
1151     {
1152         return blocked != 0;
1153     }
1154
1155     /**
1156      * Marks the access path of the cache as blocked for the given cause. This
1157      * also sets the blocked flag in the slave interface.
1158      * @param cause The reason for the cache blocking.
1159      */
1160     void setBlocked(BlockedCause cause)
1161     {
1162         uint8_t flag = 1 << cause;
1163         if (blocked == 0) {
1164             stats.blocked_causes[cause]++;
1165             blockedCycle = curCycle();
1166             cpuSidePort.setBlocked();
1167         }
1168         blocked |= flag;
1169         DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1170     }
1171
1172     /**
1173      * Marks the cache as unblocked for the given cause. This also clears the
1174      * blocked flags in the appropriate interfaces.
1175      * @param cause The newly unblocked cause.
1176      * @warning Calling this function can cause a blocked request on the bus to
1177      * access the cache. The cache must be in a state to handle that request.
1178      */
1179     void clearBlocked(BlockedCause cause)
1180     {
1181         uint8_t flag = 1 << cause;
1182         blocked &= ~flag;
1183         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1184         if (blocked == 0) {
1185             stats.blocked_cycles[cause] += curCycle() - blockedCycle;
1186             cpuSidePort.clearBlocked();
1187         }
1188     }
1189
1190     /**
1191      * Schedule a send event for the memory-side port. If already
1192      * scheduled, this may reschedule the event at an earlier
1193      * time. When the specified time is reached, the port is free to
1194      * send either a response, a request, or a prefetch request.
1195      *
1196      * @param time The time when to attempt sending a packet.
1197      */
1198     void schedMemSideSendEvent(Tick time)
1199     {
1200         memSidePort.schedSendEvent(time);
1201     }
1202
1203     bool inCache(Addr addr, bool is_secure) const {
1204         return tags->findBlock(addr, is_secure);
1205     }
1206
1207     bool hasBeenPrefetched(Addr addr, bool is_secure) const {
1208         CacheBlk *block = tags->findBlock(addr, is_secure);
1209         if (block) {
1210             return block->wasPrefetched();
1211         } else {
1212             return false;
1213         }
1214     }
1215
1216     bool inMissQueue(Addr addr, bool is_secure) const {
1217         return mshrQueue.findMatch(addr, is_secure);
1218     }
1219
1220     void incMissCount(PacketPtr pkt)
1221     {
1222         assert(pkt->req->masterId() < system->maxMasters());
1223         stats.cmdStats(pkt).misses[pkt->req->masterId()]++;
1224         pkt->req->incAccessDepth();
1225         if (missCount) {
1226             --missCount;
1227             if (missCount == 0)
1228                 exitSimLoop("A cache reached the maximum miss count");
1229         }
1230     }
1231     void incHitCount(PacketPtr pkt)
1232     {
1233         assert(pkt->req->masterId() < system->maxMasters());
1234         stats.cmdStats(pkt).hits[pkt->req->masterId()]++;
1235     }
1236
1237     /**
1238      * Checks if the cache is coalescing writes
1239      *
1240      * @return True if the cache is coalescing writes
1241      */
1242     bool coalesce() const;
1243
1244
1245     /**
1246      * Cache block visitor that writes back dirty cache blocks using
1247      * functional writes.
1248      */
1249     void writebackVisitor(CacheBlk &blk);
1250
1251     /**
1252      * Cache block visitor that invalidates all blocks in the cache.
1253      *
1254      * @warn Dirty cache lines will not be written back to memory.
1255      */
1256     void invalidateVisitor(CacheBlk &blk);
1257
1258     /**
1259      * Take an MSHR, turn it into a suitable downstream packet, and
1260      * send it out. This construct allows a queue entry to choose a suitable
1261      * approach based on its type.
1262      *
1263      * @param mshr The MSHR to turn into a packet and send
1264      * @return True if the port is waiting for a retry
1265      */
1266     virtual bool sendMSHRQueuePacket(MSHR* mshr);
1267
1268     /**
1269      * Similar to sendMSHR, but for a write-queue entry
1270      * instead. Create the packet, and send it, and if successful also
1271      * mark the entry in service.
1272      *
1273      * @param wq_entry The write-queue entry to turn into a packet and send
1274      * @return True if the port is waiting for a retry
1275      */
1276     bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1277
1278     /**
1279      * Serialize the state of the caches
1280      *
1281      * We currently don't support checkpointing cache state, so this panics.
1282      */
1283     void serialize(CheckpointOut &cp) const override;
1284     void unserialize(CheckpointIn &cp) override;
1285 };
1286
1287 /**
1288  * The write allocator inspects write packets and detects streaming
1289  * patterns. The write allocator supports a single stream where writes
1290  * are expected to access consecutive locations and keeps track of
1291  * size of the area covered by the concecutive writes in byteCount.
1292  *
1293  * 1) When byteCount has surpassed the coallesceLimit the mode
1294  * switches from ALLOCATE to COALESCE where writes should be delayed
1295  * until the whole block is written at which point a single packet
1296  * (whole line write) can service them.
1297  *
1298  * 2) When byteCount has also exceeded the noAllocateLimit (whole
1299  * line) we switch to NO_ALLOCATE when writes should not allocate in
1300  * the cache but rather send a whole line write to the memory below.
1301  */
1302 class WriteAllocator : public SimObject {
1303   public:
1304     WriteAllocator(const WriteAllocatorParams *p) :
1305         SimObject(p),
1306         coalesceLimit(p->coalesce_limit * p->block_size),
1307         noAllocateLimit(p->no_allocate_limit * p->block_size),
1308         delayThreshold(p->delay_threshold)
1309     {
1310         reset();
1311     }
1312
1313     /**
1314      * Should writes be coalesced? This is true if the mode is set to
1315      * NO_ALLOCATE.
1316      *
1317      * @return return true if the cache should coalesce writes.
1318      */
1319     bool coalesce() const {
1320         return mode != WriteMode::ALLOCATE;
1321     }
1322
1323     /**
1324      * Should writes allocate?
1325      *
1326      * @return return true if the cache should not allocate for writes.
1327      */
1328     bool allocate() const {
1329         return mode != WriteMode::NO_ALLOCATE;
1330     }
1331
1332     /**
1333      * Reset the write allocator state, meaning that it allocates for
1334      * writes and has not recorded any information about qualifying
1335      * writes that might trigger a switch to coalescing and later no
1336      * allocation.
1337      */
1338     void reset() {
1339         mode = WriteMode::ALLOCATE;
1340         byteCount = 0;
1341         nextAddr = 0;
1342     }
1343
1344     /**
1345      * Access whether we need to delay the current write.
1346      *
1347      * @param blk_addr The block address the packet writes to
1348      * @return true if the current packet should be delayed
1349      */
1350     bool delay(Addr blk_addr) {
1351         if (delayCtr[blk_addr] > 0) {
1352             --delayCtr[blk_addr];
1353             return true;
1354         } else {
1355             return false;
1356         }
1357     }
1358
1359     /**
1360      * Clear delay counter for the input block
1361      *
1362      * @param blk_addr The accessed cache block
1363      */
1364     void resetDelay(Addr blk_addr) {
1365         delayCtr.erase(blk_addr);
1366     }
1367
1368     /**
1369      * Update the write mode based on the current write
1370      * packet. This method compares the packet's address with any
1371      * current stream, and updates the tracking and the mode
1372      * accordingly.
1373      *
1374      * @param write_addr Start address of the write request
1375      * @param write_size Size of the write request
1376      * @param blk_addr The block address that this packet writes to
1377      */
1378     void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1379
1380   private:
1381     /**
1382      * The current mode for write coalescing and allocation, either
1383      * normal operation (ALLOCATE), write coalescing (COALESCE), or
1384      * write coalescing without allocation (NO_ALLOCATE).
1385      */
1386     enum class WriteMode : char {
1387         ALLOCATE,
1388         COALESCE,
1389         NO_ALLOCATE,
1390     };
1391     WriteMode mode;
1392
1393     /** Address to match writes against to detect streams. */
1394     Addr nextAddr;
1395
1396     /**
1397      * Bytes written contiguously. Saturating once we no longer
1398      * allocate.
1399      */
1400     uint32_t byteCount;
1401
1402     /**
1403      * Limits for when to switch between the different write modes.
1404      */
1405     const uint32_t coalesceLimit;
1406     const uint32_t noAllocateLimit;
1407     /**
1408      * The number of times the allocator will delay an WriteReq MSHR.
1409      */
1410     const uint32_t delayThreshold;
1411
1412     /**
1413      * Keep track of the number of times the allocator has delayed an
1414      * WriteReq MSHR.
1415      */
1416     std::unordered_map<Addr, Counter> delayCtr;
1417 };
1418
1419 #endif //__MEM_CACHE_BASE_HH__