src/mem/cache/base.hh

   1 /*
   2  * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  */
  40
  41 /**
  42  * @file
  43  * Declares a basic cache interface BaseCache.
  44  */
  45
  46 #ifndef __MEM_CACHE_BASE_HH__
  47 #define __MEM_CACHE_BASE_HH__
  48
  49 #include <cassert>
  50 #include <cstdint>
  51 #include <string>
  52
  53 #include "base/addr_range.hh"
  54 #include "base/statistics.hh"
  55 #include "base/trace.hh"
  56 #include "base/types.hh"
  57 #include "debug/Cache.hh"
  58 #include "debug/CachePort.hh"
  59 #include "enums/Clusivity.hh"
  60 #include "mem/cache/cache_blk.hh"
  61 #include "mem/cache/compressors/base.hh"
  62 #include "mem/cache/mshr_queue.hh"
  63 #include "mem/cache/tags/base.hh"
  64 #include "mem/cache/write_queue.hh"
  65 #include "mem/cache/write_queue_entry.hh"
  66 #include "mem/packet.hh"
  67 #include "mem/packet_queue.hh"
  68 #include "mem/qport.hh"
  69 #include "mem/request.hh"
  70 #include "params/WriteAllocator.hh"
  71 #include "sim/clocked_object.hh"
  72 #include "sim/eventq.hh"
  73 #include "sim/probe/probe.hh"
  74 #include "sim/serialize.hh"
  75 #include "sim/sim_exit.hh"
  76 #include "sim/system.hh"
  77
  78 namespace Prefetcher {
  79     class Base;
  80 }
  81 class MSHR;
  82 class RequestPort;
  83 class QueueEntry;
  84 struct BaseCacheParams;
  85
  86 /**
  87  * A basic cache interface. Implements some common functions for speed.
  88  */
  89 class BaseCache : public ClockedObject
  90 {
  91   protected:
  92     /**
  93      * Indexes to enumerate the MSHR queues.
  94      */
  95     enum MSHRQueueIndex {
  96         MSHRQueue_MSHRs,
  97         MSHRQueue_WriteBuffer
  98     };
  99
 100   public:
 101     /**
 102      * Reasons for caches to be blocked.
 103      */
 104     enum BlockedCause {
 105         Blocked_NoMSHRs = MSHRQueue_MSHRs,
 106         Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
 107         Blocked_NoTargets,
 108         NUM_BLOCKED_CAUSES
 109     };
 110
 111     /**
 112      * A data contents update is composed of the updated block's address,
 113      * the old contents, and the new contents.
 114      * @sa ppDataUpdate
 115      */
 116     struct DataUpdate
 117     {
 118         /** The updated block's address. */
 119         Addr addr;
 120         /** Whether the block belongs to the secure address space. */
 121         bool isSecure;
 122         /** The stale data contents. If zero-sized this update is a fill. */
 123         std::vector<uint64_t> oldData;
 124         /** The new data contents. If zero-sized this is an invalidation. */
 125         std::vector<uint64_t> newData;
 126
 127         DataUpdate(Addr _addr, bool is_secure)
 128           : addr(_addr), isSecure(is_secure), oldData(), newData()
 129         {
 130         }
 131     };
 132
 133   protected:
 134
 135     /**
 136      * A cache request port is used for the memory-side port of the
 137      * cache, and in addition to the basic timing port that only sends
 138      * response packets through a transmit list, it also offers the
 139      * ability to schedule and send request packets (requests &
 140      * writebacks). The send event is scheduled through schedSendEvent,
 141      * and the sendDeferredPacket of the timing port is modified to
 142      * consider both the transmit list and the requests from the MSHR.
 143      */
 144     class CacheRequestPort : public QueuedRequestPort
 145     {
 146
 147       public:
 148
 149         /**
 150          * Schedule a send of a request packet (from the MSHR). Note
 151          * that we could already have a retry outstanding.
 152          */
 153         void schedSendEvent(Tick time)
 154         {
 155             DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
 156             reqQueue.schedSendEvent(time);
 157         }
 158
 159       protected:
 160
 161         CacheRequestPort(const std::string &_name, BaseCache *_cache,
 162                         ReqPacketQueue &_reqQueue,
 163                         SnoopRespPacketQueue &_snoopRespQueue) :
 164             QueuedRequestPort(_name, _cache, _reqQueue, _snoopRespQueue)
 165         { }
 166
 167         /**
 168          * Memory-side port always snoops.
 169          *
 170          * @return always true
 171          */
 172         virtual bool isSnooping() const { return true; }
 173     };
 174
 175     /**
 176      * Override the default behaviour of sendDeferredPacket to enable
 177      * the memory-side cache port to also send requests based on the
 178      * current MSHR status. This queue has a pointer to our specific
 179      * cache implementation and is used by the MemSidePort.
 180      */
 181     class CacheReqPacketQueue : public ReqPacketQueue
 182     {
 183
 184       protected:
 185
 186         BaseCache &cache;
 187         SnoopRespPacketQueue &snoopRespQueue;
 188
 189       public:
 190
 191         CacheReqPacketQueue(BaseCache &cache, RequestPort &port,
 192                             SnoopRespPacketQueue &snoop_resp_queue,
 193                             const std::string &label) :
 194             ReqPacketQueue(cache, port, label), cache(cache),
 195             snoopRespQueue(snoop_resp_queue) { }
 196
 197         /**
 198          * Override the normal sendDeferredPacket and do not only
 199          * consider the transmit list (used for responses), but also
 200          * requests.
 201          */
 202         virtual void sendDeferredPacket();
 203
 204         /**
 205          * Check if there is a conflicting snoop response about to be
 206          * send out, and if so simply stall any requests, and schedule
 207          * a send event at the same time as the next snoop response is
 208          * being sent out.
 209          *
 210          * @param pkt The packet to check for conflicts against.
 211          */
 212         bool checkConflictingSnoop(const PacketPtr pkt)
 213         {
 214             if (snoopRespQueue.checkConflict(pkt, cache.blkSize)) {
 215                 DPRINTF(CachePort, "Waiting for snoop response to be "
 216                         "sent\n");
 217                 Tick when = snoopRespQueue.deferredPacketReadyTime();
 218                 schedSendEvent(when);
 219                 return true;
 220             }
 221             return false;
 222         }
 223     };
 224
 225
 226     /**
 227      * The memory-side port extends the base cache request port with
 228      * access functions for functional, atomic and timing snoops.
 229      */
 230     class MemSidePort : public CacheRequestPort
 231     {
 232       private:
 233
 234         /** The cache-specific queue. */
 235         CacheReqPacketQueue _reqQueue;
 236
 237         SnoopRespPacketQueue _snoopRespQueue;
 238
 239         // a pointer to our specific cache implementation
 240         BaseCache *cache;
 241
 242       protected:
 243
 244         virtual void recvTimingSnoopReq(PacketPtr pkt);
 245
 246         virtual bool recvTimingResp(PacketPtr pkt);
 247
 248         virtual Tick recvAtomicSnoop(PacketPtr pkt);
 249
 250         virtual void recvFunctionalSnoop(PacketPtr pkt);
 251
 252       public:
 253
 254         MemSidePort(const std::string &_name, BaseCache *_cache,
 255                     const std::string &_label);
 256     };
 257
 258     /**
 259      * A cache response port is used for the CPU-side port of the cache,
 260      * and it is basically a simple timing port that uses a transmit
 261      * list for responses to the CPU (or connected requestor). In
 262      * addition, it has the functionality to block the port for
 263      * incoming requests. If blocked, the port will issue a retry once
 264      * unblocked.
 265      */
 266     class CacheResponsePort : public QueuedResponsePort
 267     {
 268
 269       public:
 270
 271         /** Do not accept any new requests. */
 272         void setBlocked();
 273
 274         /** Return to normal operation and accept new requests. */
 275         void clearBlocked();
 276
 277         bool isBlocked() const { return blocked; }
 278
 279       protected:
 280
 281         CacheResponsePort(const std::string &_name, BaseCache *_cache,
 282                        const std::string &_label);
 283
 284         /** A normal packet queue used to store responses. */
 285         RespPacketQueue queue;
 286
 287         bool blocked;
 288
 289         bool mustSendRetry;
 290
 291       private:
 292
 293         void processSendRetry();
 294
 295         EventFunctionWrapper sendRetryEvent;
 296
 297     };
 298
 299     /**
 300      * The CPU-side port extends the base cache response port with access
 301      * functions for functional, atomic and timing requests.
 302      */
 303     class CpuSidePort : public CacheResponsePort
 304     {
 305       private:
 306
 307         // a pointer to our specific cache implementation
 308         BaseCache *cache;
 309
 310       protected:
 311         virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
 312
 313         virtual bool tryTiming(PacketPtr pkt) override;
 314
 315         virtual bool recvTimingReq(PacketPtr pkt) override;
 316
 317         virtual Tick recvAtomic(PacketPtr pkt) override;
 318
 319         virtual void recvFunctional(PacketPtr pkt) override;
 320
 321         virtual AddrRangeList getAddrRanges() const override;
 322
 323       public:
 324
 325         CpuSidePort(const std::string &_name, BaseCache *_cache,
 326                     const std::string &_label);
 327
 328     };
 329
 330     CpuSidePort cpuSidePort;
 331     MemSidePort memSidePort;
 332
 333   protected:
 334
 335     /** Miss status registers */
 336     MSHRQueue mshrQueue;
 337
 338     /** Write/writeback buffer */
 339     WriteQueue writeBuffer;
 340
 341     /** Tag and data Storage */
 342     BaseTags *tags;
 343
 344     /** Compression method being used. */
 345     Compressor::Base* compressor;
 346
 347     /** Prefetcher */
 348     Prefetcher::Base *prefetcher;
 349
 350     /** To probe when a cache hit occurs */
 351     ProbePointArg<PacketPtr> *ppHit;
 352
 353     /** To probe when a cache miss occurs */
 354     ProbePointArg<PacketPtr> *ppMiss;
 355
 356     /** To probe when a cache fill occurs */
 357     ProbePointArg<PacketPtr> *ppFill;
 358
 359     /**
 360      * To probe when the contents of a block are updated. Content updates
 361      * include data fills, overwrites, and invalidations, which means that
 362      * this probe partially overlaps with other probes.
 363      */
 364     ProbePointArg<DataUpdate> *ppDataUpdate;
 365
 366     /**
 367      * The writeAllocator drive optimizations for streaming writes.
 368      * It first determines whether a WriteReq MSHR should be delayed,
 369      * thus ensuring that we wait longer in cases when we are write
 370      * coalescing and allowing all the bytes of the line to be written
 371      * before the MSHR packet is sent downstream. This works in unison
 372      * with the tracking in the MSHR to check if the entire line is
 373      * written. The write mode also affects the behaviour on filling
 374      * any whole-line writes. Normally the cache allocates the line
 375      * when receiving the InvalidateResp, but after seeing enough
 376      * consecutive lines we switch to using the tempBlock, and thus
 377      * end up not allocating the line, and instead turning the
 378      * whole-line write into a writeback straight away.
 379      */
 380     WriteAllocator * const writeAllocator;
 381
 382     /**
 383      * Temporary cache block for occasional transitory use.  We use
 384      * the tempBlock to fill when allocation fails (e.g., when there
 385      * is an outstanding request that accesses the victim block) or
 386      * when we want to avoid allocation (e.g., exclusive caches)
 387      */
 388     TempCacheBlk *tempBlock;
 389
 390     /**
 391      * Upstream caches need this packet until true is returned, so
 392      * hold it for deletion until a subsequent call
 393      */
 394     std::unique_ptr<Packet> pendingDelete;
 395
 396     /**
 397      * Mark a request as in service (sent downstream in the memory
 398      * system), effectively making this MSHR the ordering point.
 399      */
 400     void markInService(MSHR *mshr, bool pending_modified_resp)
 401     {
 402         bool wasFull = mshrQueue.isFull();
 403         mshrQueue.markInService(mshr, pending_modified_resp);
 404
 405         if (wasFull && !mshrQueue.isFull()) {
 406             clearBlocked(Blocked_NoMSHRs);
 407         }
 408     }
 409
 410     void markInService(WriteQueueEntry *entry)
 411     {
 412         bool wasFull = writeBuffer.isFull();
 413         writeBuffer.markInService(entry);
 414
 415         if (wasFull && !writeBuffer.isFull()) {
 416             clearBlocked(Blocked_NoWBBuffers);
 417         }
 418     }
 419
 420     /**
 421      * Determine whether we should allocate on a fill or not. If this
 422      * cache is mostly inclusive with regards to the upstream cache(s)
 423      * we always allocate (for any non-forwarded and cacheable
 424      * requests). In the case of a mostly exclusive cache, we allocate
 425      * on fill if the packet did not come from a cache, thus if we:
 426      * are dealing with a whole-line write (the latter behaves much
 427      * like a writeback), the original target packet came from a
 428      * non-caching source, or if we are performing a prefetch or LLSC.
 429      *
 430      * @param cmd Command of the incoming requesting packet
 431      * @return Whether we should allocate on the fill
 432      */
 433     inline bool allocOnFill(MemCmd cmd) const
 434     {
 435         return clusivity == Enums::mostly_incl ||
 436             cmd == MemCmd::WriteLineReq ||
 437             cmd == MemCmd::ReadReq ||
 438             cmd == MemCmd::WriteReq ||
 439             cmd.isPrefetch() ||
 440             cmd.isLLSC();
 441     }
 442
 443     /**
 444      * Regenerate block address using tags.
 445      * Block address regeneration depends on whether we're using a temporary
 446      * block or not.
 447      *
 448      * @param blk The block to regenerate address.
 449      * @return The block's address.
 450      */
 451     Addr regenerateBlkAddr(CacheBlk* blk);
 452
 453     /**
 454      * Calculate latency of accesses that only touch the tag array.
 455      * @sa calculateAccessLatency
 456      *
 457      * @param delay The delay until the packet's metadata is present.
 458      * @param lookup_lat Latency of the respective tag lookup.
 459      * @return The number of ticks that pass due to a tag-only access.
 460      */
 461     Cycles calculateTagOnlyLatency(const uint32_t delay,
 462                                    const Cycles lookup_lat) const;
 463     /**
 464      * Calculate access latency in ticks given a tag lookup latency, and
 465      * whether access was a hit or miss.
 466      *
 467      * @param blk The cache block that was accessed.
 468      * @param delay The delay until the packet's metadata is present.
 469      * @param lookup_lat Latency of the respective tag lookup.
 470      * @return The number of ticks that pass due to a block access.
 471      */
 472     Cycles calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
 473                                   const Cycles lookup_lat) const;
 474
 475     /**
 476      * Does all the processing necessary to perform the provided request.
 477      * @param pkt The memory request to perform.
 478      * @param blk The cache block to be updated.
 479      * @param lat The latency of the access.
 480      * @param writebacks List for any writebacks that need to be performed.
 481      * @return Boolean indicating whether the request was satisfied.
 482      */
 483     virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 484                         PacketList &writebacks);
 485
 486     /*
 487      * Handle a timing request that hit in the cache
 488      *
 489      * @param ptk The request packet
 490      * @param blk The referenced block
 491      * @param request_time The tick at which the block lookup is compete
 492      */
 493     virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
 494                                     Tick request_time);
 495
 496     /*
 497      * Handle a timing request that missed in the cache
 498      *
 499      * Implementation specific handling for different cache
 500      * implementations
 501      *
 502      * @param ptk The request packet
 503      * @param blk The referenced block
 504      * @param forward_time The tick at which we can process dependent requests
 505      * @param request_time The tick at which the block lookup is compete
 506      */
 507     virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
 508                                      Tick forward_time,
 509                                      Tick request_time) = 0;
 510
 511     /*
 512      * Handle a timing request that missed in the cache
 513      *
 514      * Common functionality across different cache implementations
 515      *
 516      * @param ptk The request packet
 517      * @param blk The referenced block
 518      * @param mshr Any existing mshr for the referenced cache block
 519      * @param forward_time The tick at which we can process dependent requests
 520      * @param request_time The tick at which the block lookup is compete
 521      */
 522     void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 523                              Tick forward_time, Tick request_time);
 524
 525     /**
 526      * Performs the access specified by the request.
 527      * @param pkt The request to perform.
 528      */
 529     virtual void recvTimingReq(PacketPtr pkt);
 530
 531     /**
 532      * Handling the special case of uncacheable write responses to
 533      * make recvTimingResp less cluttered.
 534      */
 535     void handleUncacheableWriteResp(PacketPtr pkt);
 536
 537     /**
 538      * Service non-deferred MSHR targets using the received response
 539      *
 540      * Iterates through the list of targets that can be serviced with
 541      * the current response.
 542      *
 543      * @param mshr The MSHR that corresponds to the reponse
 544      * @param pkt The response packet
 545      * @param blk The reference block
 546      */
 547     virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
 548                                     CacheBlk *blk) = 0;
 549
 550     /**
 551      * Handles a response (cache line fill/write ack) from the bus.
 552      * @param pkt The response packet
 553      */
 554     virtual void recvTimingResp(PacketPtr pkt);
 555
 556     /**
 557      * Snoops bus transactions to maintain coherence.
 558      * @param pkt The current bus transaction.
 559      */
 560     virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
 561
 562     /**
 563      * Handle a snoop response.
 564      * @param pkt Snoop response packet
 565      */
 566     virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
 567
 568     /**
 569      * Handle a request in atomic mode that missed in this cache
 570      *
 571      * Creates a downstream request, sends it to the memory below and
 572      * handles the response. As we are in atomic mode all operations
 573      * are performed immediately.
 574      *
 575      * @param pkt The packet with the requests
 576      * @param blk The referenced block
 577      * @param writebacks A list with packets for any performed writebacks
 578      * @return Cycles for handling the request
 579      */
 580     virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
 581                                        PacketList &writebacks) = 0;
 582
 583     /**
 584      * Performs the access specified by the request.
 585      * @param pkt The request to perform.
 586      * @return The number of ticks required for the access.
 587      */
 588     virtual Tick recvAtomic(PacketPtr pkt);
 589
 590     /**
 591      * Snoop for the provided request in the cache and return the estimated
 592      * time taken.
 593      * @param pkt The memory request to snoop
 594      * @return The number of ticks required for the snoop.
 595      */
 596     virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
 597
 598     /**
 599      * Performs the access specified by the request.
 600      *
 601      * @param pkt The request to perform.
 602      * @param fromCpuSide from the CPU side port or the memory side port
 603      */
 604     virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
 605
 606     /**
 607      * Update the data contents of a block. When no packet is provided no
 608      * data will be written to the block, which means that this was likely
 609      * triggered by an invalidation.
 610      *
 611      * @param blk The block being updated.
 612      * @param cpkt The packet containing the new data.
 613      * @param has_old_data Whether this block had data previously.
 614      */
 615     void updateBlockData(CacheBlk *blk, const PacketPtr cpkt,
 616         bool has_old_data);
 617
 618     /**
 619      * Handle doing the Compare and Swap function for SPARC.
 620      */
 621     void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
 622
 623     /**
 624      * Return the next queue entry to service, either a pending miss
 625      * from the MSHR queue, a buffered write from the write buffer, or
 626      * something from the prefetcher. This function is responsible
 627      * for prioritizing among those sources on the fly.
 628      */
 629     QueueEntry* getNextQueueEntry();
 630
 631     /**
 632      * Insert writebacks into the write buffer
 633      */
 634     virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
 635
 636     /**
 637      * Send writebacks down the memory hierarchy in atomic mode
 638      */
 639     virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
 640
 641     /**
 642      * Create an appropriate downstream bus request packet.
 643      *
 644      * Creates a new packet with the request to be send to the memory
 645      * below, or nullptr if the current request in cpu_pkt should just
 646      * be forwarded on.
 647      *
 648      * @param cpu_pkt The miss packet that needs to be satisfied.
 649      * @param blk The referenced block, can be nullptr.
 650      * @param needs_writable Indicates that the block must be writable
 651      * even if the request in cpu_pkt doesn't indicate that.
 652      * @param is_whole_line_write True if there are writes for the
 653      * whole line
 654      * @return A packet send to the memory below
 655      */
 656     virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
 657                                        bool needs_writable,
 658                                        bool is_whole_line_write) const = 0;
 659
 660     /**
 661      * Determine if clean lines should be written back or not. In
 662      * cases where a downstream cache is mostly inclusive we likely
 663      * want it to act as a victim cache also for lines that have not
 664      * been modified. Hence, we cannot simply drop the line (or send a
 665      * clean evict), but rather need to send the actual data.
 666      */
 667     const bool writebackClean;
 668
 669     /**
 670      * Writebacks from the tempBlock, resulting on the response path
 671      * in atomic mode, must happen after the call to recvAtomic has
 672      * finished (for the right ordering of the packets). We therefore
 673      * need to hold on to the packets, and have a method and an event
 674      * to send them.
 675      */
 676     PacketPtr tempBlockWriteback;
 677
 678     /**
 679      * Send the outstanding tempBlock writeback. To be called after
 680      * recvAtomic finishes in cases where the block we filled is in
 681      * fact the tempBlock, and now needs to be written back.
 682      */
 683     void writebackTempBlockAtomic() {
 684         assert(tempBlockWriteback != nullptr);
 685         PacketList writebacks{tempBlockWriteback};
 686         doWritebacksAtomic(writebacks);
 687         tempBlockWriteback = nullptr;
 688     }
 689
 690     /**
 691      * An event to writeback the tempBlock after recvAtomic
 692      * finishes. To avoid other calls to recvAtomic getting in
 693      * between, we create this event with a higher priority.
 694      */
 695     EventFunctionWrapper writebackTempBlockAtomicEvent;
 696
 697     /**
 698      * When a block is overwriten, its compression information must be updated,
 699      * and it may need to be recompressed. If the compression size changes, the
 700      * block may either become smaller, in which case there is no side effect,
 701      * or bigger (data expansion; fat write), in which case the block might not
 702      * fit in its current location anymore. If that happens, there are usually
 703      * two options to be taken:
 704      *
 705      * - The co-allocated blocks must be evicted to make room for this block.
 706      *   Simpler, but ignores replacement data.
 707      * - The block itself is moved elsewhere (used in policies where the CF
 708      *   determines the location of the block).
 709      *
 710      * This implementation uses the first approach.
 711      *
 712      * Notice that this is only called for writebacks, which means that L1
 713      * caches (which see regular Writes), do not support compression.
 714      * @sa CompressedTags
 715      *
 716      * @param blk The block to be overwriten.
 717      * @param data A pointer to the data to be compressed (blk's new data).
 718      * @param writebacks List for any writebacks that need to be performed.
 719      * @return Whether operation is successful or not.
 720      */
 721     bool updateCompressionData(CacheBlk *&blk, const uint64_t* data,
 722                                PacketList &writebacks);
 723
 724     /**
 725      * Perform any necessary updates to the block and perform any data
 726      * exchange between the packet and the block. The flags of the
 727      * packet are also set accordingly.
 728      *
 729      * @param pkt Request packet from upstream that hit a block
 730      * @param blk Cache block that the packet hit
 731      * @param deferred_response Whether this request originally missed
 732      * @param pending_downgrade Whether the writable flag is to be removed
 733      */
 734     virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
 735                                 bool deferred_response = false,
 736                                 bool pending_downgrade = false);
 737
 738     /**
 739      * Maintain the clusivity of this cache by potentially
 740      * invalidating a block. This method works in conjunction with
 741      * satisfyRequest, but is separate to allow us to handle all MSHR
 742      * targets before potentially dropping a block.
 743      *
 744      * @param from_cache Whether we have dealt with a packet from a cache
 745      * @param blk The block that should potentially be dropped
 746      */
 747     void maintainClusivity(bool from_cache, CacheBlk *blk);
 748
 749     /**
 750      * Try to evict the given blocks. If any of them is a transient eviction,
 751      * that is, the block is present in the MSHR queue all evictions are
 752      * cancelled since handling such cases has not been implemented.
 753      *
 754      * @param evict_blks Blocks marked for eviction.
 755      * @param writebacks List for any writebacks that need to be performed.
 756      * @return False if any of the evicted blocks is in transient state.
 757      */
 758     bool handleEvictions(std::vector<CacheBlk*> &evict_blks,
 759         PacketList &writebacks);
 760
 761     /**
 762      * Handle a fill operation caused by a received packet.
 763      *
 764      * Populates a cache block and handles all outstanding requests for the
 765      * satisfied fill request. This version takes two memory requests. One
 766      * contains the fill data, the other is an optional target to satisfy.
 767      * Note that the reason we return a list of writebacks rather than
 768      * inserting them directly in the write buffer is that this function
 769      * is called by both atomic and timing-mode accesses, and in atomic
 770      * mode we don't mess with the write buffer (we just perform the
 771      * writebacks atomically once the original request is complete).
 772      *
 773      * @param pkt The memory request with the fill data.
 774      * @param blk The cache block if it already exists.
 775      * @param writebacks List for any writebacks that need to be performed.
 776      * @param allocate Whether to allocate a block or use the temp block
 777      * @return Pointer to the new cache block.
 778      */
 779     CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
 780                          PacketList &writebacks, bool allocate);
 781
 782     /**
 783      * Allocate a new block and perform any necessary writebacks
 784      *
 785      * Find a victim block and if necessary prepare writebacks for any
 786      * existing data. May return nullptr if there are no replaceable
 787      * blocks. If a replaceable block is found, it inserts the new block in
 788      * its place. The new block, however, is not set as valid yet.
 789      *
 790      * @param pkt Packet holding the address to update
 791      * @param writebacks A list of writeback packets for the evicted blocks
 792      * @return the allocated block
 793      */
 794     CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
 795     /**
 796      * Evict a cache block.
 797      *
 798      * Performs a writeback if necesssary and invalidates the block
 799      *
 800      * @param blk Block to invalidate
 801      * @return A packet with the writeback, can be nullptr
 802      */
 803     M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
 804
 805     /**
 806      * Evict a cache block.
 807      *
 808      * Performs a writeback if necesssary and invalidates the block
 809      *
 810      * @param blk Block to invalidate
 811      * @param writebacks Return a list of packets with writebacks
 812      */
 813     void evictBlock(CacheBlk *blk, PacketList &writebacks);
 814
 815     /**
 816      * Invalidate a cache block.
 817      *
 818      * @param blk Block to invalidate
 819      */
 820     void invalidateBlock(CacheBlk *blk);
 821
 822     /**
 823      * Create a writeback request for the given block.
 824      *
 825      * @param blk The block to writeback.
 826      * @return The writeback request for the block.
 827      */
 828     PacketPtr writebackBlk(CacheBlk *blk);
 829
 830     /**
 831      * Create a writeclean request for the given block.
 832      *
 833      * Creates a request that writes the block to the cache below
 834      * without evicting the block from the current cache.
 835      *
 836      * @param blk The block to write clean.
 837      * @param dest The destination of the write clean operation.
 838      * @param id Use the given packet id for the write clean operation.
 839      * @return The generated write clean packet.
 840      */
 841     PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
 842
 843     /**
 844      * Write back dirty blocks in the cache using functional accesses.
 845      */
 846     virtual void memWriteback() override;
 847
 848     /**
 849      * Invalidates all blocks in the cache.
 850      *
 851      * @warn Dirty cache lines will not be written back to
 852      * memory. Make sure to call functionalWriteback() first if you
 853      * want the to write them to memory.
 854      */
 855     virtual void memInvalidate() override;
 856
 857     /**
 858      * Determine if there are any dirty blocks in the cache.
 859      *
 860      * @return true if at least one block is dirty, false otherwise.
 861      */
 862     bool isDirty() const;
 863
 864     /**
 865      * Determine if an address is in the ranges covered by this
 866      * cache. This is useful to filter snoops.
 867      *
 868      * @param addr Address to check against
 869      *
 870      * @return If the address in question is in range
 871      */
 872     bool inRange(Addr addr) const;
 873
 874     /**
 875      * Find next request ready time from among possible sources.
 876      */
 877     Tick nextQueueReadyTime() const;
 878
 879     /** Block size of this cache */
 880     const unsigned blkSize;
 881
 882     /**
 883      * The latency of tag lookup of a cache. It occurs when there is
 884      * an access to the cache.
 885      */
 886     const Cycles lookupLatency;
 887
 888     /**
 889      * The latency of data access of a cache. It occurs when there is
 890      * an access to the cache.
 891      */
 892     const Cycles dataLatency;
 893
 894     /**
 895      * This is the forward latency of the cache. It occurs when there
 896      * is a cache miss and a request is forwarded downstream, in
 897      * particular an outbound miss.
 898      */
 899     const Cycles forwardLatency;
 900
 901     /** The latency to fill a cache block */
 902     const Cycles fillLatency;
 903
 904     /**
 905      * The latency of sending reponse to its upper level cache/core on
 906      * a linefill. The responseLatency parameter captures this
 907      * latency.
 908      */
 909     const Cycles responseLatency;
 910
 911     /**
 912      * Whether tags and data are accessed sequentially.
 913      */
 914     const bool sequentialAccess;
 915
 916     /** The number of targets for each MSHR. */
 917     const int numTarget;
 918
 919     /** Do we forward snoops from mem side port through to cpu side port? */
 920     bool forwardSnoops;
 921
 922     /**
 923      * Clusivity with respect to the upstream cache, determining if we
 924      * fill into both this cache and the cache above on a miss. Note
 925      * that we currently do not support strict clusivity policies.
 926      */
 927     const Enums::Clusivity clusivity;
 928
 929     /**
 930      * Is this cache read only, for example the instruction cache, or
 931      * table-walker cache. A cache that is read only should never see
 932      * any writes, and should never get any dirty data (and hence
 933      * never have to do any writebacks).
 934      */
 935     const bool isReadOnly;
 936
 937     /**
 938      * when a data expansion of a compressed block happens it will not be
 939      * able to co-allocate where it is at anymore. If true, the replacement
 940      * policy is called to chose a new location for the block. Otherwise,
 941      * all co-allocated blocks are evicted.
 942      */
 943     const bool replaceExpansions;
 944
 945     /**
 946      * Similar to data expansions, after a block improves its compression,
 947      * it may need to be moved elsewhere compatible with the new compression
 948      * factor, or, if not required by the compaction method, it may be moved
 949      * to co-allocate with an existing block and thus free an entry.
 950      */
 951     const bool moveContractions;
 952
 953     /**
 954      * Bit vector of the blocking reasons for the access path.
 955      * @sa #BlockedCause
 956      */
 957     uint8_t blocked;
 958
 959     /** Increasing order number assigned to each incoming request. */
 960     uint64_t order;
 961
 962     /** Stores time the cache blocked for statistics. */
 963     Cycles blockedCycle;
 964
 965     /** Pointer to the MSHR that has no targets. */
 966     MSHR *noTargetMSHR;
 967
 968     /** The number of misses to trigger an exit event. */
 969     Counter missCount;
 970
 971     /**
 972      * The address range to which the cache responds on the CPU side.
 973      * Normally this is all possible memory addresses. */
 974     const AddrRangeList addrRanges;
 975
 976   public:
 977     /** System we are currently operating in. */
 978     System *system;
 979
 980     struct CacheCmdStats : public Stats::Group
 981     {
 982         CacheCmdStats(BaseCache &c, const std::string &name);
 983
 984         /**
 985          * Callback to register stats from parent
 986          * CacheStats::regStats(). We can't use the normal flow since
 987          * there is is no guaranteed order and CacheStats::regStats()
 988          * needs to rely on these stats being initialised.
 989          */
 990         void regStatsFromParent();
 991
 992         const BaseCache &cache;
 993
 994         /** Number of hits per thread for each type of command.
 995             @sa Packet::Command */
 996         Stats::Vector hits;
 997         /** Number of misses per thread for each type of command.
 998             @sa Packet::Command */
 999         Stats::Vector misses;
1000         /**
1001          * Total number of cycles per thread/command spent waiting for a miss.
1002          * Used to calculate the average miss latency.
1003          */
1004         Stats::Vector missLatency;
1005         /** The number of accesses per command and thread. */
1006         Stats::Formula accesses;
1007         /** The miss rate per command and thread. */
1008         Stats::Formula missRate;
1009         /** The average miss latency per command and thread. */
1010         Stats::Formula avgMissLatency;
1011         /** Number of misses that hit in the MSHRs per command and thread. */
1012         Stats::Vector mshrHits;
1013         /** Number of misses that miss in the MSHRs, per command and thread. */
1014         Stats::Vector mshrMisses;
1015         /** Number of misses that miss in the MSHRs, per command and thread. */
1016         Stats::Vector mshrUncacheable;
1017         /** Total cycle latency of each MSHR miss, per command and thread. */
1018         Stats::Vector mshrMissLatency;
1019         /** Total cycle latency of each MSHR miss, per command and thread. */
1020         Stats::Vector mshrUncacheableLatency;
1021         /** The miss rate in the MSHRs pre command and thread. */
1022         Stats::Formula mshrMissRate;
1023         /** The average latency of an MSHR miss, per command and thread. */
1024         Stats::Formula avgMshrMissLatency;
1025         /** The average latency of an MSHR miss, per command and thread. */
1026         Stats::Formula avgMshrUncacheableLatency;
1027     };
1028
1029     struct CacheStats : public Stats::Group
1030     {
1031         CacheStats(BaseCache &c);
1032
1033         void regStats() override;
1034
1035         CacheCmdStats &cmdStats(const PacketPtr p) {
1036             return *cmd[p->cmdToIndex()];
1037         }
1038
1039         const BaseCache &cache;
1040
1041         /** Number of hits for demand accesses. */
1042         Stats::Formula demandHits;
1043         /** Number of hit for all accesses. */
1044         Stats::Formula overallHits;
1045
1046         /** Number of misses for demand accesses. */
1047         Stats::Formula demandMisses;
1048         /** Number of misses for all accesses. */
1049         Stats::Formula overallMisses;
1050
1051         /** Total number of cycles spent waiting for demand misses. */
1052         Stats::Formula demandMissLatency;
1053         /** Total number of cycles spent waiting for all misses. */
1054         Stats::Formula overallMissLatency;
1055
1056         /** The number of demand accesses. */
1057         Stats::Formula demandAccesses;
1058         /** The number of overall accesses. */
1059         Stats::Formula overallAccesses;
1060
1061         /** The miss rate of all demand accesses. */
1062         Stats::Formula demandMissRate;
1063         /** The miss rate for all accesses. */
1064         Stats::Formula overallMissRate;
1065
1066         /** The average miss latency for demand misses. */
1067         Stats::Formula demandAvgMissLatency;
1068         /** The average miss latency for all misses. */
1069         Stats::Formula overallAvgMissLatency;
1070
1071         /** The total number of cycles blocked for each blocked cause. */
1072         Stats::Vector blockedCycles;
1073         /** The number of times this cache blocked for each blocked cause. */
1074         Stats::Vector blockedCauses;
1075
1076         /** The average number of cycles blocked for each blocked cause. */
1077         Stats::Formula avgBlocked;
1078
1079         /** The number of times a HW-prefetched block is evicted w/o
1080          * reference. */
1081         Stats::Scalar unusedPrefetches;
1082
1083         /** Number of blocks written back per thread. */
1084         Stats::Vector writebacks;
1085
1086         /** Demand misses that hit in the MSHRs. */
1087         Stats::Formula demandMshrHits;
1088         /** Total number of misses that hit in the MSHRs. */
1089         Stats::Formula overallMshrHits;
1090
1091         /** Demand misses that miss in the MSHRs. */
1092         Stats::Formula demandMshrMisses;
1093         /** Total number of misses that miss in the MSHRs. */
1094         Stats::Formula overallMshrMisses;
1095
1096         /** Total number of misses that miss in the MSHRs. */
1097         Stats::Formula overallMshrUncacheable;
1098
1099         /** Total cycle latency of demand MSHR misses. */
1100         Stats::Formula demandMshrMissLatency;
1101         /** Total cycle latency of overall MSHR misses. */
1102         Stats::Formula overallMshrMissLatency;
1103
1104         /** Total cycle latency of overall MSHR misses. */
1105         Stats::Formula overallMshrUncacheableLatency;
1106
1107         /** The demand miss rate in the MSHRs. */
1108         Stats::Formula demandMshrMissRate;
1109         /** The overall miss rate in the MSHRs. */
1110         Stats::Formula overallMshrMissRate;
1111
1112         /** The average latency of a demand MSHR miss. */
1113         Stats::Formula demandAvgMshrMissLatency;
1114         /** The average overall latency of an MSHR miss. */
1115         Stats::Formula overallAvgMshrMissLatency;
1116
1117         /** The average overall latency of an MSHR miss. */
1118         Stats::Formula overallAvgMshrUncacheableLatency;
1119
1120         /** Number of replacements of valid blocks. */
1121         Stats::Scalar replacements;
1122
1123         /** Number of data expansions. */
1124         Stats::Scalar dataExpansions;
1125
1126         /**
1127          * Number of data contractions (blocks that had their compression
1128          * factor improved).
1129          */
1130         Stats::Scalar dataContractions;
1131
1132         /** Per-command statistics */
1133         std::vector<std::unique_ptr<CacheCmdStats>> cmd;
1134     } stats;
1135
1136     /** Registers probes. */
1137     void regProbePoints() override;
1138
1139   public:
1140     BaseCache(const BaseCacheParams &p, unsigned blk_size);
1141     ~BaseCache();
1142
1143     void init() override;
1144
1145     Port &getPort(const std::string &if_name,
1146                   PortID idx=InvalidPortID) override;
1147
1148     /**
1149      * Query block size of a cache.
1150      * @return  The block size
1151      */
1152     unsigned
1153     getBlockSize() const
1154     {
1155         return blkSize;
1156     }
1157
1158     const AddrRangeList &getAddrRanges() const { return addrRanges; }
1159
1160     MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1161     {
1162         MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1163                                         pkt, time, order++,
1164                                         allocOnFill(pkt->cmd));
1165
1166         if (mshrQueue.isFull()) {
1167             setBlocked((BlockedCause)MSHRQueue_MSHRs);
1168         }
1169
1170         if (sched_send) {
1171             // schedule the send
1172             schedMemSideSendEvent(time);
1173         }
1174
1175         return mshr;
1176     }
1177
1178     void allocateWriteBuffer(PacketPtr pkt, Tick time)
1179     {
1180         // should only see writes or clean evicts here
1181         assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1182
1183         Addr blk_addr = pkt->getBlockAddr(blkSize);
1184
1185         // If using compression, on evictions the block is decompressed and
1186         // the operation's latency is added to the payload delay. Consume
1187         // that payload delay here, meaning that the data is always stored
1188         // uncompressed in the writebuffer
1189         if (compressor) {
1190             time += pkt->payloadDelay;
1191             pkt->payloadDelay = 0;
1192         }
1193
1194         WriteQueueEntry *wq_entry =
1195             writeBuffer.findMatch(blk_addr, pkt->isSecure());
1196         if (wq_entry && !wq_entry->inService) {
1197             DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1198         }
1199
1200         writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1201
1202         if (writeBuffer.isFull()) {
1203             setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1204         }
1205
1206         // schedule the send
1207         schedMemSideSendEvent(time);
1208     }
1209
1210     /**
1211      * Returns true if the cache is blocked for accesses.
1212      */
1213     bool isBlocked() const
1214     {
1215         return blocked != 0;
1216     }
1217
1218     /**
1219      * Marks the access path of the cache as blocked for the given cause. This
1220      * also sets the blocked flag in the response interface.
1221      * @param cause The reason for the cache blocking.
1222      */
1223     void setBlocked(BlockedCause cause)
1224     {
1225         uint8_t flag = 1 << cause;
1226         if (blocked == 0) {
1227             stats.blockedCauses[cause]++;
1228             blockedCycle = curCycle();
1229             cpuSidePort.setBlocked();
1230         }
1231         blocked |= flag;
1232         DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1233     }
1234
1235     /**
1236      * Marks the cache as unblocked for the given cause. This also clears the
1237      * blocked flags in the appropriate interfaces.
1238      * @param cause The newly unblocked cause.
1239      * @warning Calling this function can cause a blocked request on the bus to
1240      * access the cache. The cache must be in a state to handle that request.
1241      */
1242     void clearBlocked(BlockedCause cause)
1243     {
1244         uint8_t flag = 1 << cause;
1245         blocked &= ~flag;
1246         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1247         if (blocked == 0) {
1248             stats.blockedCycles[cause] += curCycle() - blockedCycle;
1249             cpuSidePort.clearBlocked();
1250         }
1251     }
1252
1253     /**
1254      * Schedule a send event for the memory-side port. If already
1255      * scheduled, this may reschedule the event at an earlier
1256      * time. When the specified time is reached, the port is free to
1257      * send either a response, a request, or a prefetch request.
1258      *
1259      * @param time The time when to attempt sending a packet.
1260      */
1261     void schedMemSideSendEvent(Tick time)
1262     {
1263         memSidePort.schedSendEvent(time);
1264     }
1265
1266     bool inCache(Addr addr, bool is_secure) const {
1267         return tags->findBlock(addr, is_secure);
1268     }
1269
1270     bool hasBeenPrefetched(Addr addr, bool is_secure) const {
1271         CacheBlk *block = tags->findBlock(addr, is_secure);
1272         if (block) {
1273             return block->wasPrefetched();
1274         } else {
1275             return false;
1276         }
1277     }
1278
1279     bool inMissQueue(Addr addr, bool is_secure) const {
1280         return mshrQueue.findMatch(addr, is_secure);
1281     }
1282
1283     void incMissCount(PacketPtr pkt)
1284     {
1285         assert(pkt->req->requestorId() < system->maxRequestors());
1286         stats.cmdStats(pkt).misses[pkt->req->requestorId()]++;
1287         pkt->req->incAccessDepth();
1288         if (missCount) {
1289             --missCount;
1290             if (missCount == 0)
1291                 exitSimLoop("A cache reached the maximum miss count");
1292         }
1293     }
1294     void incHitCount(PacketPtr pkt)
1295     {
1296         assert(pkt->req->requestorId() < system->maxRequestors());
1297         stats.cmdStats(pkt).hits[pkt->req->requestorId()]++;
1298     }
1299
1300     /**
1301      * Checks if the cache is coalescing writes
1302      *
1303      * @return True if the cache is coalescing writes
1304      */
1305     bool coalesce() const;
1306
1307
1308     /**
1309      * Cache block visitor that writes back dirty cache blocks using
1310      * functional writes.
1311      */
1312     void writebackVisitor(CacheBlk &blk);
1313
1314     /**
1315      * Cache block visitor that invalidates all blocks in the cache.
1316      *
1317      * @warn Dirty cache lines will not be written back to memory.
1318      */
1319     void invalidateVisitor(CacheBlk &blk);
1320
1321     /**
1322      * Take an MSHR, turn it into a suitable downstream packet, and
1323      * send it out. This construct allows a queue entry to choose a suitable
1324      * approach based on its type.
1325      *
1326      * @param mshr The MSHR to turn into a packet and send
1327      * @return True if the port is waiting for a retry
1328      */
1329     virtual bool sendMSHRQueuePacket(MSHR* mshr);
1330
1331     /**
1332      * Similar to sendMSHR, but for a write-queue entry
1333      * instead. Create the packet, and send it, and if successful also
1334      * mark the entry in service.
1335      *
1336      * @param wq_entry The write-queue entry to turn into a packet and send
1337      * @return True if the port is waiting for a retry
1338      */
1339     bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1340
1341     /**
1342      * Serialize the state of the caches
1343      *
1344      * We currently don't support checkpointing cache state, so this panics.
1345      */
1346     void serialize(CheckpointOut &cp) const override;
1347     void unserialize(CheckpointIn &cp) override;
1348 };
1349
1350 /**
1351  * The write allocator inspects write packets and detects streaming
1352  * patterns. The write allocator supports a single stream where writes
1353  * are expected to access consecutive locations and keeps track of
1354  * size of the area covered by the concecutive writes in byteCount.
1355  *
1356  * 1) When byteCount has surpassed the coallesceLimit the mode
1357  * switches from ALLOCATE to COALESCE where writes should be delayed
1358  * until the whole block is written at which point a single packet
1359  * (whole line write) can service them.
1360  *
1361  * 2) When byteCount has also exceeded the noAllocateLimit (whole
1362  * line) we switch to NO_ALLOCATE when writes should not allocate in
1363  * the cache but rather send a whole line write to the memory below.
1364  */
1365 class WriteAllocator : public SimObject {
1366   public:
1367     WriteAllocator(const WriteAllocatorParams &p) :
1368         SimObject(p),
1369         coalesceLimit(p.coalesce_limit * p.block_size),
1370         noAllocateLimit(p.no_allocate_limit * p.block_size),
1371         delayThreshold(p.delay_threshold)
1372     {
1373         reset();
1374     }
1375
1376     /**
1377      * Should writes be coalesced? This is true if the mode is set to
1378      * NO_ALLOCATE.
1379      *
1380      * @return return true if the cache should coalesce writes.
1381      */
1382     bool coalesce() const {
1383         return mode != WriteMode::ALLOCATE;
1384     }
1385
1386     /**
1387      * Should writes allocate?
1388      *
1389      * @return return true if the cache should not allocate for writes.
1390      */
1391     bool allocate() const {
1392         return mode != WriteMode::NO_ALLOCATE;
1393     }
1394
1395     /**
1396      * Reset the write allocator state, meaning that it allocates for
1397      * writes and has not recorded any information about qualifying
1398      * writes that might trigger a switch to coalescing and later no
1399      * allocation.
1400      */
1401     void reset() {
1402         mode = WriteMode::ALLOCATE;
1403         byteCount = 0;
1404         nextAddr = 0;
1405     }
1406
1407     /**
1408      * Access whether we need to delay the current write.
1409      *
1410      * @param blk_addr The block address the packet writes to
1411      * @return true if the current packet should be delayed
1412      */
1413     bool delay(Addr blk_addr) {
1414         if (delayCtr[blk_addr] > 0) {
1415             --delayCtr[blk_addr];
1416             return true;
1417         } else {
1418             return false;
1419         }
1420     }
1421
1422     /**
1423      * Clear delay counter for the input block
1424      *
1425      * @param blk_addr The accessed cache block
1426      */
1427     void resetDelay(Addr blk_addr) {
1428         delayCtr.erase(blk_addr);
1429     }
1430
1431     /**
1432      * Update the write mode based on the current write
1433      * packet. This method compares the packet's address with any
1434      * current stream, and updates the tracking and the mode
1435      * accordingly.
1436      *
1437      * @param write_addr Start address of the write request
1438      * @param write_size Size of the write request
1439      * @param blk_addr The block address that this packet writes to
1440      */
1441     void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1442
1443   private:
1444     /**
1445      * The current mode for write coalescing and allocation, either
1446      * normal operation (ALLOCATE), write coalescing (COALESCE), or
1447      * write coalescing without allocation (NO_ALLOCATE).
1448      */
1449     enum class WriteMode : char {
1450         ALLOCATE,
1451         COALESCE,
1452         NO_ALLOCATE,
1453     };
1454     WriteMode mode;
1455
1456     /** Address to match writes against to detect streams. */
1457     Addr nextAddr;
1458
1459     /**
1460      * Bytes written contiguously. Saturating once we no longer
1461      * allocate.
1462      */
1463     uint32_t byteCount;
1464
1465     /**
1466      * Limits for when to switch between the different write modes.
1467      */
1468     const uint32_t coalesceLimit;
1469     const uint32_t noAllocateLimit;
1470     /**
1471      * The number of times the allocator will delay an WriteReq MSHR.
1472      */
1473     const uint32_t delayThreshold;
1474
1475     /**
1476      * Keep track of the number of times the allocator has delayed an
1477      * WriteReq MSHR.
1478      */
1479     std::unordered_map<Addr, Counter> delayCtr;
1480 };
1481
1482 #endif //__MEM_CACHE_BASE_HH__