src/mem/cache/base.hh

   1 /*
   2  * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Erik Hallnor
  41  *          Steve Reinhardt
  42  *          Ron Dreslinski
  43  *          Andreas Hansson
  44  *          Nikos Nikoleris
  45  */
  46
  47 /**
  48  * @file
  49  * Declares a basic cache interface BaseCache.
  50  */
  51
  52 #ifndef __MEM_CACHE_BASE_HH__
  53 #define __MEM_CACHE_BASE_HH__
  54
  55 #include <cassert>
  56 #include <cstdint>
  57 #include <string>
  58
  59 #include "base/addr_range.hh"
  60 #include "base/statistics.hh"
  61 #include "base/trace.hh"
  62 #include "base/types.hh"
  63 #include "debug/Cache.hh"
  64 #include "debug/CachePort.hh"
  65 #include "enums/Clusivity.hh"
  66 #include "mem/cache/cache_blk.hh"
  67 #include "mem/cache/mshr_queue.hh"
  68 #include "mem/cache/tags/base.hh"
  69 #include "mem/cache/write_queue.hh"
  70 #include "mem/cache/write_queue_entry.hh"
  71 #include "mem/mem_object.hh"
  72 #include "mem/packet.hh"
  73 #include "mem/packet_queue.hh"
  74 #include "mem/qport.hh"
  75 #include "mem/request.hh"
  76 #include "params/WriteAllocator.hh"
  77 #include "sim/eventq.hh"
  78 #include "sim/serialize.hh"
  79 #include "sim/sim_exit.hh"
  80 #include "sim/system.hh"
  81
  82 class BaseMasterPort;
  83 class BasePrefetcher;
  84 class BaseSlavePort;
  85 class MSHR;
  86 class MasterPort;
  87 class QueueEntry;
  88 struct BaseCacheParams;
  89
  90 /**
  91  * A basic cache interface. Implements some common functions for speed.
  92  */
  93 class BaseCache : public MemObject
  94 {
  95   protected:
  96     /**
  97      * Indexes to enumerate the MSHR queues.
  98      */
  99     enum MSHRQueueIndex {
 100         MSHRQueue_MSHRs,
 101         MSHRQueue_WriteBuffer
 102     };
 103
 104   public:
 105     /**
 106      * Reasons for caches to be blocked.
 107      */
 108     enum BlockedCause {
 109         Blocked_NoMSHRs = MSHRQueue_MSHRs,
 110         Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
 111         Blocked_NoTargets,
 112         NUM_BLOCKED_CAUSES
 113     };
 114
 115   protected:
 116
 117     /**
 118      * A cache master port is used for the memory-side port of the
 119      * cache, and in addition to the basic timing port that only sends
 120      * response packets through a transmit list, it also offers the
 121      * ability to schedule and send request packets (requests &
 122      * writebacks). The send event is scheduled through schedSendEvent,
 123      * and the sendDeferredPacket of the timing port is modified to
 124      * consider both the transmit list and the requests from the MSHR.
 125      */
 126     class CacheMasterPort : public QueuedMasterPort
 127     {
 128
 129       public:
 130
 131         /**
 132          * Schedule a send of a request packet (from the MSHR). Note
 133          * that we could already have a retry outstanding.
 134          */
 135         void schedSendEvent(Tick time)
 136         {
 137             DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
 138             reqQueue.schedSendEvent(time);
 139         }
 140
 141       protected:
 142
 143         CacheMasterPort(const std::string &_name, BaseCache *_cache,
 144                         ReqPacketQueue &_reqQueue,
 145                         SnoopRespPacketQueue &_snoopRespQueue) :
 146             QueuedMasterPort(_name, _cache, _reqQueue, _snoopRespQueue)
 147         { }
 148
 149         /**
 150          * Memory-side port always snoops.
 151          *
 152          * @return always true
 153          */
 154         virtual bool isSnooping() const { return true; }
 155     };
 156
 157     /**
 158      * Override the default behaviour of sendDeferredPacket to enable
 159      * the memory-side cache port to also send requests based on the
 160      * current MSHR status. This queue has a pointer to our specific
 161      * cache implementation and is used by the MemSidePort.
 162      */
 163     class CacheReqPacketQueue : public ReqPacketQueue
 164     {
 165
 166       protected:
 167
 168         BaseCache &cache;
 169         SnoopRespPacketQueue &snoopRespQueue;
 170
 171       public:
 172
 173         CacheReqPacketQueue(BaseCache &cache, MasterPort &port,
 174                             SnoopRespPacketQueue &snoop_resp_queue,
 175                             const std::string &label) :
 176             ReqPacketQueue(cache, port, label), cache(cache),
 177             snoopRespQueue(snoop_resp_queue) { }
 178
 179         /**
 180          * Override the normal sendDeferredPacket and do not only
 181          * consider the transmit list (used for responses), but also
 182          * requests.
 183          */
 184         virtual void sendDeferredPacket();
 185
 186         /**
 187          * Check if there is a conflicting snoop response about to be
 188          * send out, and if so simply stall any requests, and schedule
 189          * a send event at the same time as the next snoop response is
 190          * being sent out.
 191          */
 192         bool checkConflictingSnoop(Addr addr)
 193         {
 194             if (snoopRespQueue.hasAddr(addr)) {
 195                 DPRINTF(CachePort, "Waiting for snoop response to be "
 196                         "sent\n");
 197                 Tick when = snoopRespQueue.deferredPacketReadyTime();
 198                 schedSendEvent(when);
 199                 return true;
 200             }
 201             return false;
 202         }
 203     };
 204
 205
 206     /**
 207      * The memory-side port extends the base cache master port with
 208      * access functions for functional, atomic and timing snoops.
 209      */
 210     class MemSidePort : public CacheMasterPort
 211     {
 212       private:
 213
 214         /** The cache-specific queue. */
 215         CacheReqPacketQueue _reqQueue;
 216
 217         SnoopRespPacketQueue _snoopRespQueue;
 218
 219         // a pointer to our specific cache implementation
 220         BaseCache *cache;
 221
 222       protected:
 223
 224         virtual void recvTimingSnoopReq(PacketPtr pkt);
 225
 226         virtual bool recvTimingResp(PacketPtr pkt);
 227
 228         virtual Tick recvAtomicSnoop(PacketPtr pkt);
 229
 230         virtual void recvFunctionalSnoop(PacketPtr pkt);
 231
 232       public:
 233
 234         MemSidePort(const std::string &_name, BaseCache *_cache,
 235                     const std::string &_label);
 236     };
 237
 238     /**
 239      * A cache slave port is used for the CPU-side port of the cache,
 240      * and it is basically a simple timing port that uses a transmit
 241      * list for responses to the CPU (or connected master). In
 242      * addition, it has the functionality to block the port for
 243      * incoming requests. If blocked, the port will issue a retry once
 244      * unblocked.
 245      */
 246     class CacheSlavePort : public QueuedSlavePort
 247     {
 248
 249       public:
 250
 251         /** Do not accept any new requests. */
 252         void setBlocked();
 253
 254         /** Return to normal operation and accept new requests. */
 255         void clearBlocked();
 256
 257         bool isBlocked() const { return blocked; }
 258
 259       protected:
 260
 261         CacheSlavePort(const std::string &_name, BaseCache *_cache,
 262                        const std::string &_label);
 263
 264         /** A normal packet queue used to store responses. */
 265         RespPacketQueue queue;
 266
 267         bool blocked;
 268
 269         bool mustSendRetry;
 270
 271       private:
 272
 273         void processSendRetry();
 274
 275         EventFunctionWrapper sendRetryEvent;
 276
 277     };
 278
 279     /**
 280      * The CPU-side port extends the base cache slave port with access
 281      * functions for functional, atomic and timing requests.
 282      */
 283     class CpuSidePort : public CacheSlavePort
 284     {
 285       private:
 286
 287         // a pointer to our specific cache implementation
 288         BaseCache *cache;
 289
 290       protected:
 291         virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
 292
 293         virtual bool tryTiming(PacketPtr pkt) override;
 294
 295         virtual bool recvTimingReq(PacketPtr pkt) override;
 296
 297         virtual Tick recvAtomic(PacketPtr pkt) override;
 298
 299         virtual void recvFunctional(PacketPtr pkt) override;
 300
 301         virtual AddrRangeList getAddrRanges() const override;
 302
 303       public:
 304
 305         CpuSidePort(const std::string &_name, BaseCache *_cache,
 306                     const std::string &_label);
 307
 308     };
 309
 310     CpuSidePort cpuSidePort;
 311     MemSidePort memSidePort;
 312
 313   protected:
 314
 315     /** Miss status registers */
 316     MSHRQueue mshrQueue;
 317
 318     /** Write/writeback buffer */
 319     WriteQueue writeBuffer;
 320
 321     /** Tag and data Storage */
 322     BaseTags *tags;
 323
 324     /** Prefetcher */
 325     BasePrefetcher *prefetcher;
 326
 327     /**
 328      * Notify the prefetcher on every access, not just misses.
 329      */
 330     const bool prefetchOnAccess;
 331
 332     /**
 333      * The writeAllocator drive optimizations for streaming writes.
 334      * It first determines whether a WriteReq MSHR should be delayed,
 335      * thus ensuring that we wait longer in cases when we are write
 336      * coalescing and allowing all the bytes of the line to be written
 337      * before the MSHR packet is sent downstream. This works in unison
 338      * with the tracking in the MSHR to check if the entire line is
 339      * written. The write mode also affects the behaviour on filling
 340      * any whole-line writes. Normally the cache allocates the line
 341      * when receiving the InvalidateResp, but after seeing enough
 342      * consecutive lines we switch to using the tempBlock, and thus
 343      * end up not allocating the line, and instead turning the
 344      * whole-line write into a writeback straight away.
 345      */
 346     WriteAllocator * const writeAllocator;
 347
 348     /**
 349      * Temporary cache block for occasional transitory use.  We use
 350      * the tempBlock to fill when allocation fails (e.g., when there
 351      * is an outstanding request that accesses the victim block) or
 352      * when we want to avoid allocation (e.g., exclusive caches)
 353      */
 354     TempCacheBlk *tempBlock;
 355
 356     /**
 357      * Upstream caches need this packet until true is returned, so
 358      * hold it for deletion until a subsequent call
 359      */
 360     std::unique_ptr<Packet> pendingDelete;
 361
 362     /**
 363      * Mark a request as in service (sent downstream in the memory
 364      * system), effectively making this MSHR the ordering point.
 365      */
 366     void markInService(MSHR *mshr, bool pending_modified_resp)
 367     {
 368         bool wasFull = mshrQueue.isFull();
 369         mshrQueue.markInService(mshr, pending_modified_resp);
 370
 371         if (wasFull && !mshrQueue.isFull()) {
 372             clearBlocked(Blocked_NoMSHRs);
 373         }
 374     }
 375
 376     void markInService(WriteQueueEntry *entry)
 377     {
 378         bool wasFull = writeBuffer.isFull();
 379         writeBuffer.markInService(entry);
 380
 381         if (wasFull && !writeBuffer.isFull()) {
 382             clearBlocked(Blocked_NoWBBuffers);
 383         }
 384     }
 385
 386     /**
 387      * Determine whether we should allocate on a fill or not. If this
 388      * cache is mostly inclusive with regards to the upstream cache(s)
 389      * we always allocate (for any non-forwarded and cacheable
 390      * requests). In the case of a mostly exclusive cache, we allocate
 391      * on fill if the packet did not come from a cache, thus if we:
 392      * are dealing with a whole-line write (the latter behaves much
 393      * like a writeback), the original target packet came from a
 394      * non-caching source, or if we are performing a prefetch or LLSC.
 395      *
 396      * @param cmd Command of the incoming requesting packet
 397      * @return Whether we should allocate on the fill
 398      */
 399     inline bool allocOnFill(MemCmd cmd) const
 400     {
 401         return clusivity == Enums::mostly_incl ||
 402             cmd == MemCmd::WriteLineReq ||
 403             cmd == MemCmd::ReadReq ||
 404             cmd == MemCmd::WriteReq ||
 405             cmd.isPrefetch() ||
 406             cmd.isLLSC();
 407     }
 408
 409     /**
 410      * Regenerate block address using tags.
 411      * Block address regeneration depends on whether we're using a temporary
 412      * block or not.
 413      *
 414      * @param blk The block to regenerate address.
 415      * @return The block's address.
 416      */
 417     Addr regenerateBlkAddr(CacheBlk* blk);
 418
 419     /**
 420      * Does all the processing necessary to perform the provided request.
 421      * @param pkt The memory request to perform.
 422      * @param blk The cache block to be updated.
 423      * @param lat The latency of the access.
 424      * @param writebacks List for any writebacks that need to be performed.
 425      * @return Boolean indicating whether the request was satisfied.
 426      */
 427     virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 428                         PacketList &writebacks);
 429
 430     /*
 431      * Handle a timing request that hit in the cache
 432      *
 433      * @param ptk The request packet
 434      * @param blk The referenced block
 435      * @param request_time The tick at which the block lookup is compete
 436      */
 437     virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
 438                                     Tick request_time);
 439
 440     /*
 441      * Handle a timing request that missed in the cache
 442      *
 443      * Implementation specific handling for different cache
 444      * implementations
 445      *
 446      * @param ptk The request packet
 447      * @param blk The referenced block
 448      * @param forward_time The tick at which we can process dependent requests
 449      * @param request_time The tick at which the block lookup is compete
 450      */
 451     virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
 452                                      Tick forward_time,
 453                                      Tick request_time) = 0;
 454
 455     /*
 456      * Handle a timing request that missed in the cache
 457      *
 458      * Common functionality across different cache implementations
 459      *
 460      * @param ptk The request packet
 461      * @param blk The referenced block
 462      * @param mshr Any existing mshr for the referenced cache block
 463      * @param forward_time The tick at which we can process dependent requests
 464      * @param request_time The tick at which the block lookup is compete
 465      */
 466     void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 467                              Tick forward_time, Tick request_time);
 468
 469     /**
 470      * Performs the access specified by the request.
 471      * @param pkt The request to perform.
 472      */
 473     virtual void recvTimingReq(PacketPtr pkt);
 474
 475     /**
 476      * Handling the special case of uncacheable write responses to
 477      * make recvTimingResp less cluttered.
 478      */
 479     void handleUncacheableWriteResp(PacketPtr pkt);
 480
 481     /**
 482      * Service non-deferred MSHR targets using the received response
 483      *
 484      * Iterates through the list of targets that can be serviced with
 485      * the current response. Any writebacks that need to performed
 486      * must be appended to the writebacks parameter.
 487      *
 488      * @param mshr The MSHR that corresponds to the reponse
 489      * @param pkt The response packet
 490      * @param blk The reference block
 491      * @param writebacks List of writebacks that need to be performed
 492      */
 493     virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
 494                                     CacheBlk *blk, PacketList& writebacks) = 0;
 495
 496     /**
 497      * Handles a response (cache line fill/write ack) from the bus.
 498      * @param pkt The response packet
 499      */
 500     virtual void recvTimingResp(PacketPtr pkt);
 501
 502     /**
 503      * Snoops bus transactions to maintain coherence.
 504      * @param pkt The current bus transaction.
 505      */
 506     virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
 507
 508     /**
 509      * Handle a snoop response.
 510      * @param pkt Snoop response packet
 511      */
 512     virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
 513
 514     /**
 515      * Handle a request in atomic mode that missed in this cache
 516      *
 517      * Creates a downstream request, sends it to the memory below and
 518      * handles the response. As we are in atomic mode all operations
 519      * are performed immediately.
 520      *
 521      * @param pkt The packet with the requests
 522      * @param blk The referenced block
 523      * @param writebacks A list with packets for any performed writebacks
 524      * @return Cycles for handling the request
 525      */
 526     virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
 527                                        PacketList &writebacks) = 0;
 528
 529     /**
 530      * Performs the access specified by the request.
 531      * @param pkt The request to perform.
 532      * @return The number of ticks required for the access.
 533      */
 534     virtual Tick recvAtomic(PacketPtr pkt);
 535
 536     /**
 537      * Snoop for the provided request in the cache and return the estimated
 538      * time taken.
 539      * @param pkt The memory request to snoop
 540      * @return The number of ticks required for the snoop.
 541      */
 542     virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
 543
 544     /**
 545      * Performs the access specified by the request.
 546      *
 547      * @param pkt The request to perform.
 548      * @param fromCpuSide from the CPU side port or the memory side port
 549      */
 550     virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
 551
 552     /**
 553      * Handle doing the Compare and Swap function for SPARC.
 554      */
 555     void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
 556
 557     /**
 558      * Return the next queue entry to service, either a pending miss
 559      * from the MSHR queue, a buffered write from the write buffer, or
 560      * something from the prefetcher. This function is responsible
 561      * for prioritizing among those sources on the fly.
 562      */
 563     QueueEntry* getNextQueueEntry();
 564
 565     /**
 566      * Insert writebacks into the write buffer
 567      */
 568     virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
 569
 570     /**
 571      * Send writebacks down the memory hierarchy in atomic mode
 572      */
 573     virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
 574
 575     /**
 576      * Create an appropriate downstream bus request packet.
 577      *
 578      * Creates a new packet with the request to be send to the memory
 579      * below, or nullptr if the current request in cpu_pkt should just
 580      * be forwarded on.
 581      *
 582      * @param cpu_pkt The miss packet that needs to be satisfied.
 583      * @param blk The referenced block, can be nullptr.
 584      * @param needs_writable Indicates that the block must be writable
 585      * even if the request in cpu_pkt doesn't indicate that.
 586      * @param is_whole_line_write True if there are writes for the
 587      * whole line
 588      * @return A packet send to the memory below
 589      */
 590     virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
 591                                        bool needs_writable,
 592                                        bool is_whole_line_write) const = 0;
 593
 594     /**
 595      * Determine if clean lines should be written back or not. In
 596      * cases where a downstream cache is mostly inclusive we likely
 597      * want it to act as a victim cache also for lines that have not
 598      * been modified. Hence, we cannot simply drop the line (or send a
 599      * clean evict), but rather need to send the actual data.
 600      */
 601     const bool writebackClean;
 602
 603     /**
 604      * Writebacks from the tempBlock, resulting on the response path
 605      * in atomic mode, must happen after the call to recvAtomic has
 606      * finished (for the right ordering of the packets). We therefore
 607      * need to hold on to the packets, and have a method and an event
 608      * to send them.
 609      */
 610     PacketPtr tempBlockWriteback;
 611
 612     /**
 613      * Send the outstanding tempBlock writeback. To be called after
 614      * recvAtomic finishes in cases where the block we filled is in
 615      * fact the tempBlock, and now needs to be written back.
 616      */
 617     void writebackTempBlockAtomic() {
 618         assert(tempBlockWriteback != nullptr);
 619         PacketList writebacks{tempBlockWriteback};
 620         doWritebacksAtomic(writebacks);
 621         tempBlockWriteback = nullptr;
 622     }
 623
 624     /**
 625      * An event to writeback the tempBlock after recvAtomic
 626      * finishes. To avoid other calls to recvAtomic getting in
 627      * between, we create this event with a higher priority.
 628      */
 629     EventFunctionWrapper writebackTempBlockAtomicEvent;
 630
 631     /**
 632      * Perform any necessary updates to the block and perform any data
 633      * exchange between the packet and the block. The flags of the
 634      * packet are also set accordingly.
 635      *
 636      * @param pkt Request packet from upstream that hit a block
 637      * @param blk Cache block that the packet hit
 638      * @param deferred_response Whether this request originally missed
 639      * @param pending_downgrade Whether the writable flag is to be removed
 640      */
 641     virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
 642                                 bool deferred_response = false,
 643                                 bool pending_downgrade = false);
 644
 645     /**
 646      * Maintain the clusivity of this cache by potentially
 647      * invalidating a block. This method works in conjunction with
 648      * satisfyRequest, but is separate to allow us to handle all MSHR
 649      * targets before potentially dropping a block.
 650      *
 651      * @param from_cache Whether we have dealt with a packet from a cache
 652      * @param blk The block that should potentially be dropped
 653      */
 654     void maintainClusivity(bool from_cache, CacheBlk *blk);
 655
 656     /**
 657      * Handle a fill operation caused by a received packet.
 658      *
 659      * Populates a cache block and handles all outstanding requests for the
 660      * satisfied fill request. This version takes two memory requests. One
 661      * contains the fill data, the other is an optional target to satisfy.
 662      * Note that the reason we return a list of writebacks rather than
 663      * inserting them directly in the write buffer is that this function
 664      * is called by both atomic and timing-mode accesses, and in atomic
 665      * mode we don't mess with the write buffer (we just perform the
 666      * writebacks atomically once the original request is complete).
 667      *
 668      * @param pkt The memory request with the fill data.
 669      * @param blk The cache block if it already exists.
 670      * @param writebacks List for any writebacks that need to be performed.
 671      * @param allocate Whether to allocate a block or use the temp block
 672      * @return Pointer to the new cache block.
 673      */
 674     CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
 675                          PacketList &writebacks, bool allocate);
 676
 677     /**
 678      * Allocate a new block and perform any necessary writebacks
 679      *
 680      * Find a victim block and if necessary prepare writebacks for any
 681      * existing data. May return nullptr if there are no replaceable
 682      * blocks. If a replaceable block is found, it inserts the new block in
 683      * its place. The new block, however, is not set as valid yet.
 684      *
 685      * @param pkt Packet holding the address to update
 686      * @param writebacks A list of writeback packets for the evicted blocks
 687      * @return the allocated block
 688      */
 689     CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
 690     /**
 691      * Evict a cache block.
 692      *
 693      * Performs a writeback if necesssary and invalidates the block
 694      *
 695      * @param blk Block to invalidate
 696      * @return A packet with the writeback, can be nullptr
 697      */
 698     M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
 699
 700     /**
 701      * Evict a cache block.
 702      *
 703      * Performs a writeback if necesssary and invalidates the block
 704      *
 705      * @param blk Block to invalidate
 706      * @param writebacks Return a list of packets with writebacks
 707      */
 708     void evictBlock(CacheBlk *blk, PacketList &writebacks);
 709
 710     /**
 711      * Invalidate a cache block.
 712      *
 713      * @param blk Block to invalidate
 714      */
 715     void invalidateBlock(CacheBlk *blk);
 716
 717     /**
 718      * Create a writeback request for the given block.
 719      *
 720      * @param blk The block to writeback.
 721      * @return The writeback request for the block.
 722      */
 723     PacketPtr writebackBlk(CacheBlk *blk);
 724
 725     /**
 726      * Create a writeclean request for the given block.
 727      *
 728      * Creates a request that writes the block to the cache below
 729      * without evicting the block from the current cache.
 730      *
 731      * @param blk The block to write clean.
 732      * @param dest The destination of the write clean operation.
 733      * @param id Use the given packet id for the write clean operation.
 734      * @return The generated write clean packet.
 735      */
 736     PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
 737
 738     /**
 739      * Write back dirty blocks in the cache using functional accesses.
 740      */
 741     virtual void memWriteback() override;
 742
 743     /**
 744      * Invalidates all blocks in the cache.
 745      *
 746      * @warn Dirty cache lines will not be written back to
 747      * memory. Make sure to call functionalWriteback() first if you
 748      * want the to write them to memory.
 749      */
 750     virtual void memInvalidate() override;
 751
 752     /**
 753      * Determine if there are any dirty blocks in the cache.
 754      *
 755      * @return true if at least one block is dirty, false otherwise.
 756      */
 757     bool isDirty() const;
 758
 759     /**
 760      * Determine if an address is in the ranges covered by this
 761      * cache. This is useful to filter snoops.
 762      *
 763      * @param addr Address to check against
 764      *
 765      * @return If the address in question is in range
 766      */
 767     bool inRange(Addr addr) const;
 768
 769     /**
 770      * Find next request ready time from among possible sources.
 771      */
 772     Tick nextQueueReadyTime() const;
 773
 774     /** Block size of this cache */
 775     const unsigned blkSize;
 776
 777     /**
 778      * The latency of tag lookup of a cache. It occurs when there is
 779      * an access to the cache.
 780      */
 781     const Cycles lookupLatency;
 782
 783     /**
 784      * The latency of data access of a cache. It occurs when there is
 785      * an access to the cache.
 786      */
 787     const Cycles dataLatency;
 788
 789     /**
 790      * This is the forward latency of the cache. It occurs when there
 791      * is a cache miss and a request is forwarded downstream, in
 792      * particular an outbound miss.
 793      */
 794     const Cycles forwardLatency;
 795
 796     /** The latency to fill a cache block */
 797     const Cycles fillLatency;
 798
 799     /**
 800      * The latency of sending reponse to its upper level cache/core on
 801      * a linefill. The responseLatency parameter captures this
 802      * latency.
 803      */
 804     const Cycles responseLatency;
 805
 806     /** The number of targets for each MSHR. */
 807     const int numTarget;
 808
 809     /** Do we forward snoops from mem side port through to cpu side port? */
 810     bool forwardSnoops;
 811
 812     /**
 813      * Clusivity with respect to the upstream cache, determining if we
 814      * fill into both this cache and the cache above on a miss. Note
 815      * that we currently do not support strict clusivity policies.
 816      */
 817     const Enums::Clusivity clusivity;
 818
 819     /**
 820      * Is this cache read only, for example the instruction cache, or
 821      * table-walker cache. A cache that is read only should never see
 822      * any writes, and should never get any dirty data (and hence
 823      * never have to do any writebacks).
 824      */
 825     const bool isReadOnly;
 826
 827     /**
 828      * Bit vector of the blocking reasons for the access path.
 829      * @sa #BlockedCause
 830      */
 831     uint8_t blocked;
 832
 833     /** Increasing order number assigned to each incoming request. */
 834     uint64_t order;
 835
 836     /** Stores time the cache blocked for statistics. */
 837     Cycles blockedCycle;
 838
 839     /** Pointer to the MSHR that has no targets. */
 840     MSHR *noTargetMSHR;
 841
 842     /** The number of misses to trigger an exit event. */
 843     Counter missCount;
 844
 845     /**
 846      * The address range to which the cache responds on the CPU side.
 847      * Normally this is all possible memory addresses. */
 848     const AddrRangeList addrRanges;
 849
 850   public:
 851     /** System we are currently operating in. */
 852     System *system;
 853
 854     // Statistics
 855     /**
 856      * @addtogroup CacheStatistics
 857      * @{
 858      */
 859
 860     /** Number of hits per thread for each type of command.
 861         @sa Packet::Command */
 862     Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
 863     /** Number of hits for demand accesses. */
 864     Stats::Formula demandHits;
 865     /** Number of hit for all accesses. */
 866     Stats::Formula overallHits;
 867
 868     /** Number of misses per thread for each type of command.
 869         @sa Packet::Command */
 870     Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
 871     /** Number of misses for demand accesses. */
 872     Stats::Formula demandMisses;
 873     /** Number of misses for all accesses. */
 874     Stats::Formula overallMisses;
 875
 876     /**
 877      * Total number of cycles per thread/command spent waiting for a miss.
 878      * Used to calculate the average miss latency.
 879      */
 880     Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
 881     /** Total number of cycles spent waiting for demand misses. */
 882     Stats::Formula demandMissLatency;
 883     /** Total number of cycles spent waiting for all misses. */
 884     Stats::Formula overallMissLatency;
 885
 886     /** The number of accesses per command and thread. */
 887     Stats::Formula accesses[MemCmd::NUM_MEM_CMDS];
 888     /** The number of demand accesses. */
 889     Stats::Formula demandAccesses;
 890     /** The number of overall accesses. */
 891     Stats::Formula overallAccesses;
 892
 893     /** The miss rate per command and thread. */
 894     Stats::Formula missRate[MemCmd::NUM_MEM_CMDS];
 895     /** The miss rate of all demand accesses. */
 896     Stats::Formula demandMissRate;
 897     /** The miss rate for all accesses. */
 898     Stats::Formula overallMissRate;
 899
 900     /** The average miss latency per command and thread. */
 901     Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS];
 902     /** The average miss latency for demand misses. */
 903     Stats::Formula demandAvgMissLatency;
 904     /** The average miss latency for all misses. */
 905     Stats::Formula overallAvgMissLatency;
 906
 907     /** The total number of cycles blocked for each blocked cause. */
 908     Stats::Vector blocked_cycles;
 909     /** The number of times this cache blocked for each blocked cause. */
 910     Stats::Vector blocked_causes;
 911
 912     /** The average number of cycles blocked for each blocked cause. */
 913     Stats::Formula avg_blocked;
 914
 915     /** The number of times a HW-prefetched block is evicted w/o reference. */
 916     Stats::Scalar unusedPrefetches;
 917
 918     /** Number of blocks written back per thread. */
 919     Stats::Vector writebacks;
 920
 921     /** Number of misses that hit in the MSHRs per command and thread. */
 922     Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
 923     /** Demand misses that hit in the MSHRs. */
 924     Stats::Formula demandMshrHits;
 925     /** Total number of misses that hit in the MSHRs. */
 926     Stats::Formula overallMshrHits;
 927
 928     /** Number of misses that miss in the MSHRs, per command and thread. */
 929     Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
 930     /** Demand misses that miss in the MSHRs. */
 931     Stats::Formula demandMshrMisses;
 932     /** Total number of misses that miss in the MSHRs. */
 933     Stats::Formula overallMshrMisses;
 934
 935     /** Number of misses that miss in the MSHRs, per command and thread. */
 936     Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
 937     /** Total number of misses that miss in the MSHRs. */
 938     Stats::Formula overallMshrUncacheable;
 939
 940     /** Total cycle latency of each MSHR miss, per command and thread. */
 941     Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
 942     /** Total cycle latency of demand MSHR misses. */
 943     Stats::Formula demandMshrMissLatency;
 944     /** Total cycle latency of overall MSHR misses. */
 945     Stats::Formula overallMshrMissLatency;
 946
 947     /** Total cycle latency of each MSHR miss, per command and thread. */
 948     Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
 949     /** Total cycle latency of overall MSHR misses. */
 950     Stats::Formula overallMshrUncacheableLatency;
 951
 952 #if 0
 953     /** The total number of MSHR accesses per command and thread. */
 954     Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
 955     /** The total number of demand MSHR accesses. */
 956     Stats::Formula demandMshrAccesses;
 957     /** The total number of MSHR accesses. */
 958     Stats::Formula overallMshrAccesses;
 959 #endif
 960
 961     /** The miss rate in the MSHRs pre command and thread. */
 962     Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
 963     /** The demand miss rate in the MSHRs. */
 964     Stats::Formula demandMshrMissRate;
 965     /** The overall miss rate in the MSHRs. */
 966     Stats::Formula overallMshrMissRate;
 967
 968     /** The average latency of an MSHR miss, per command and thread. */
 969     Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
 970     /** The average latency of a demand MSHR miss. */
 971     Stats::Formula demandAvgMshrMissLatency;
 972     /** The average overall latency of an MSHR miss. */
 973     Stats::Formula overallAvgMshrMissLatency;
 974
 975     /** The average latency of an MSHR miss, per command and thread. */
 976     Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
 977     /** The average overall latency of an MSHR miss. */
 978     Stats::Formula overallAvgMshrUncacheableLatency;
 979
 980     /** Number of replacements of valid blocks. */
 981     Stats::Scalar replacements;
 982
 983     /**
 984      * @}
 985      */
 986
 987     /**
 988      * Register stats for this object.
 989      */
 990     void regStats() override;
 991
 992   public:
 993     BaseCache(const BaseCacheParams *p, unsigned blk_size);
 994     ~BaseCache();
 995
 996     void init() override;
 997
 998     BaseMasterPort &getMasterPort(const std::string &if_name,
 999                                   PortID idx = InvalidPortID) override;
1000     BaseSlavePort &getSlavePort(const std::string &if_name,
1001                                 PortID idx = InvalidPortID) override;
1002
1003     /**
1004      * Query block size of a cache.
1005      * @return  The block size
1006      */
1007     unsigned
1008     getBlockSize() const
1009     {
1010         return blkSize;
1011     }
1012
1013     const AddrRangeList &getAddrRanges() const { return addrRanges; }
1014
1015     MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1016     {
1017         MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1018                                         pkt, time, order++,
1019                                         allocOnFill(pkt->cmd));
1020
1021         if (mshrQueue.isFull()) {
1022             setBlocked((BlockedCause)MSHRQueue_MSHRs);
1023         }
1024
1025         if (sched_send) {
1026             // schedule the send
1027             schedMemSideSendEvent(time);
1028         }
1029
1030         return mshr;
1031     }
1032
1033     void allocateWriteBuffer(PacketPtr pkt, Tick time)
1034     {
1035         // should only see writes or clean evicts here
1036         assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1037
1038         Addr blk_addr = pkt->getBlockAddr(blkSize);
1039
1040         WriteQueueEntry *wq_entry =
1041             writeBuffer.findMatch(blk_addr, pkt->isSecure());
1042         if (wq_entry && !wq_entry->inService) {
1043             DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1044         }
1045
1046         writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1047
1048         if (writeBuffer.isFull()) {
1049             setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1050         }
1051
1052         // schedule the send
1053         schedMemSideSendEvent(time);
1054     }
1055
1056     /**
1057      * Returns true if the cache is blocked for accesses.
1058      */
1059     bool isBlocked() const
1060     {
1061         return blocked != 0;
1062     }
1063
1064     /**
1065      * Marks the access path of the cache as blocked for the given cause. This
1066      * also sets the blocked flag in the slave interface.
1067      * @param cause The reason for the cache blocking.
1068      */
1069     void setBlocked(BlockedCause cause)
1070     {
1071         uint8_t flag = 1 << cause;
1072         if (blocked == 0) {
1073             blocked_causes[cause]++;
1074             blockedCycle = curCycle();
1075             cpuSidePort.setBlocked();
1076         }
1077         blocked |= flag;
1078         DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1079     }
1080
1081     /**
1082      * Marks the cache as unblocked for the given cause. This also clears the
1083      * blocked flags in the appropriate interfaces.
1084      * @param cause The newly unblocked cause.
1085      * @warning Calling this function can cause a blocked request on the bus to
1086      * access the cache. The cache must be in a state to handle that request.
1087      */
1088     void clearBlocked(BlockedCause cause)
1089     {
1090         uint8_t flag = 1 << cause;
1091         blocked &= ~flag;
1092         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1093         if (blocked == 0) {
1094             blocked_cycles[cause] += curCycle() - blockedCycle;
1095             cpuSidePort.clearBlocked();
1096         }
1097     }
1098
1099     /**
1100      * Schedule a send event for the memory-side port. If already
1101      * scheduled, this may reschedule the event at an earlier
1102      * time. When the specified time is reached, the port is free to
1103      * send either a response, a request, or a prefetch request.
1104      *
1105      * @param time The time when to attempt sending a packet.
1106      */
1107     void schedMemSideSendEvent(Tick time)
1108     {
1109         memSidePort.schedSendEvent(time);
1110     }
1111
1112     bool inCache(Addr addr, bool is_secure) const {
1113         return tags->findBlock(addr, is_secure);
1114     }
1115
1116     bool inMissQueue(Addr addr, bool is_secure) const {
1117         return mshrQueue.findMatch(addr, is_secure);
1118     }
1119
1120     void incMissCount(PacketPtr pkt)
1121     {
1122         assert(pkt->req->masterId() < system->maxMasters());
1123         misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
1124         pkt->req->incAccessDepth();
1125         if (missCount) {
1126             --missCount;
1127             if (missCount == 0)
1128                 exitSimLoop("A cache reached the maximum miss count");
1129         }
1130     }
1131     void incHitCount(PacketPtr pkt)
1132     {
1133         assert(pkt->req->masterId() < system->maxMasters());
1134         hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
1135
1136     }
1137
1138     /**
1139      * Cache block visitor that writes back dirty cache blocks using
1140      * functional writes.
1141      */
1142     void writebackVisitor(CacheBlk &blk);
1143
1144     /**
1145      * Cache block visitor that invalidates all blocks in the cache.
1146      *
1147      * @warn Dirty cache lines will not be written back to memory.
1148      */
1149     void invalidateVisitor(CacheBlk &blk);
1150
1151     /**
1152      * Take an MSHR, turn it into a suitable downstream packet, and
1153      * send it out. This construct allows a queue entry to choose a suitable
1154      * approach based on its type.
1155      *
1156      * @param mshr The MSHR to turn into a packet and send
1157      * @return True if the port is waiting for a retry
1158      */
1159     virtual bool sendMSHRQueuePacket(MSHR* mshr);
1160
1161     /**
1162      * Similar to sendMSHR, but for a write-queue entry
1163      * instead. Create the packet, and send it, and if successful also
1164      * mark the entry in service.
1165      *
1166      * @param wq_entry The write-queue entry to turn into a packet and send
1167      * @return True if the port is waiting for a retry
1168      */
1169     bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1170
1171     /**
1172      * Serialize the state of the caches
1173      *
1174      * We currently don't support checkpointing cache state, so this panics.
1175      */
1176     void serialize(CheckpointOut &cp) const override;
1177     void unserialize(CheckpointIn &cp) override;
1178
1179 };
1180
1181 /**
1182  * The write allocator inspects write packets and detects streaming
1183  * patterns. The write allocator supports a single stream where writes
1184  * are expected to access consecutive locations and keeps track of
1185  * size of the area covered by the concecutive writes in byteCount.
1186  *
1187  * 1) When byteCount has surpassed the coallesceLimit the mode
1188  * switches from ALLOCATE to COALESCE where writes should be delayed
1189  * until the whole block is written at which point a single packet
1190  * (whole line write) can service them.
1191  *
1192  * 2) When byteCount has also exceeded the noAllocateLimit (whole
1193  * line) we switch to NO_ALLOCATE when writes should not allocate in
1194  * the cache but rather send a whole line write to the memory below.
1195  */
1196 class WriteAllocator : public SimObject {
1197   public:
1198     WriteAllocator(const WriteAllocatorParams *p) :
1199         SimObject(p),
1200         coalesceLimit(p->coalesce_limit * p->block_size),
1201         noAllocateLimit(p->no_allocate_limit * p->block_size),
1202         delayThreshold(p->delay_threshold)
1203     {
1204         reset();
1205     }
1206
1207     /**
1208      * Should writes be coalesced? This is true if the mode is set to
1209      * NO_ALLOCATE.
1210      *
1211      * @return return true if the cache should coalesce writes.
1212      */
1213     bool coalesce() const {
1214         return mode != WriteMode::ALLOCATE;
1215     }
1216
1217     /**
1218      * Should writes allocate?
1219      *
1220      * @return return true if the cache should not allocate for writes.
1221      */
1222     bool allocate() const {
1223         return mode != WriteMode::NO_ALLOCATE;
1224     }
1225
1226     /**
1227      * Reset the write allocator state, meaning that it allocates for
1228      * writes and has not recorded any information about qualifying
1229      * writes that might trigger a switch to coalescing and later no
1230      * allocation.
1231      */
1232     void reset() {
1233         mode = WriteMode::ALLOCATE;
1234         byteCount = 0;
1235         nextAddr = 0;
1236     }
1237
1238     /**
1239      * Access whether we need to delay the current write.
1240      *
1241      * @param blk_addr The block address the packet writes to
1242      * @return true if the current packet should be delayed
1243      */
1244     bool delay(Addr blk_addr) {
1245         if (delayCtr[blk_addr] > 0) {
1246             --delayCtr[blk_addr];
1247             return true;
1248         } else {
1249             return false;
1250         }
1251     }
1252
1253     /**
1254      * Clear delay counter for the input block
1255      *
1256      * @param blk_addr The accessed cache block
1257      */
1258     void resetDelay(Addr blk_addr) {
1259         delayCtr.erase(blk_addr);
1260     }
1261
1262     /**
1263      * Update the write mode based on the current write
1264      * packet. This method compares the packet's address with any
1265      * current stream, and updates the tracking and the mode
1266      * accordingly.
1267      *
1268      * @param write_addr Start address of the write request
1269      * @param write_size Size of the write request
1270      * @param blk_addr The block address that this packet writes to
1271      */
1272     void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1273
1274   private:
1275     /**
1276      * The current mode for write coalescing and allocation, either
1277      * normal operation (ALLOCATE), write coalescing (COALESCE), or
1278      * write coalescing without allocation (NO_ALLOCATE).
1279      */
1280     enum class WriteMode : char {
1281         ALLOCATE,
1282         COALESCE,
1283         NO_ALLOCATE,
1284     };
1285     WriteMode mode;
1286
1287     /** Address to match writes against to detect streams. */
1288     Addr nextAddr;
1289
1290     /**
1291      * Bytes written contiguously. Saturating once we no longer
1292      * allocate.
1293      */
1294     uint32_t byteCount;
1295
1296     /**
1297      * Limits for when to switch between the different write modes.
1298      */
1299     const uint32_t coalesceLimit;
1300     const uint32_t noAllocateLimit;
1301     /**
1302      * The number of times the allocator will delay an WriteReq MSHR.
1303      */
1304     const uint32_t delayThreshold;
1305
1306     /**
1307      * Keep track of the number of times the allocator has delayed an
1308      * WriteReq MSHR.
1309      */
1310     std::unordered_map<Addr, Counter> delayCtr;
1311 };
1312
1313 #endif //__MEM_CACHE_BASE_HH__