src/mem/cache/base.hh

   1 /*
   2  * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited
   3  * All rights reserved.
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * Authors: Erik Hallnor
  41  *          Steve Reinhardt
  42  *          Ron Dreslinski
  43  *          Andreas Hansson
  44  *          Nikos Nikoleris
  45  */
  46
  47 /**
  48  * @file
  49  * Declares a basic cache interface BaseCache.
  50  */
  51
  52 #ifndef __MEM_CACHE_BASE_HH__
  53 #define __MEM_CACHE_BASE_HH__
  54
  55 #include <cassert>
  56 #include <cstdint>
  57 #include <string>
  58
  59 #include "base/addr_range.hh"
  60 #include "base/statistics.hh"
  61 #include "base/trace.hh"
  62 #include "base/types.hh"
  63 #include "debug/Cache.hh"
  64 #include "debug/CachePort.hh"
  65 #include "enums/Clusivity.hh"
  66 #include "mem/cache/cache_blk.hh"
  67 #include "mem/cache/mshr_queue.hh"
  68 #include "mem/cache/tags/base.hh"
  69 #include "mem/cache/write_queue.hh"
  70 #include "mem/cache/write_queue_entry.hh"
  71 #include "mem/mem_object.hh"
  72 #include "mem/packet.hh"
  73 #include "mem/packet_queue.hh"
  74 #include "mem/qport.hh"
  75 #include "mem/request.hh"
  76 #include "params/WriteAllocator.hh"
  77 #include "sim/eventq.hh"
  78 #include "sim/probe/probe.hh"
  79 #include "sim/serialize.hh"
  80 #include "sim/sim_exit.hh"
  81 #include "sim/system.hh"
  82
  83 class BaseMasterPort;
  84 class BasePrefetcher;
  85 class BaseSlavePort;
  86 class MSHR;
  87 class MasterPort;
  88 class QueueEntry;
  89 struct BaseCacheParams;
  90
  91 /**
  92  * A basic cache interface. Implements some common functions for speed.
  93  */
  94 class BaseCache : public MemObject
  95 {
  96   protected:
  97     /**
  98      * Indexes to enumerate the MSHR queues.
  99      */
 100     enum MSHRQueueIndex {
 101         MSHRQueue_MSHRs,
 102         MSHRQueue_WriteBuffer
 103     };
 104
 105   public:
 106     /**
 107      * Reasons for caches to be blocked.
 108      */
 109     enum BlockedCause {
 110         Blocked_NoMSHRs = MSHRQueue_MSHRs,
 111         Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
 112         Blocked_NoTargets,
 113         NUM_BLOCKED_CAUSES
 114     };
 115
 116   protected:
 117
 118     /**
 119      * A cache master port is used for the memory-side port of the
 120      * cache, and in addition to the basic timing port that only sends
 121      * response packets through a transmit list, it also offers the
 122      * ability to schedule and send request packets (requests &
 123      * writebacks). The send event is scheduled through schedSendEvent,
 124      * and the sendDeferredPacket of the timing port is modified to
 125      * consider both the transmit list and the requests from the MSHR.
 126      */
 127     class CacheMasterPort : public QueuedMasterPort
 128     {
 129
 130       public:
 131
 132         /**
 133          * Schedule a send of a request packet (from the MSHR). Note
 134          * that we could already have a retry outstanding.
 135          */
 136         void schedSendEvent(Tick time)
 137         {
 138             DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
 139             reqQueue.schedSendEvent(time);
 140         }
 141
 142       protected:
 143
 144         CacheMasterPort(const std::string &_name, BaseCache *_cache,
 145                         ReqPacketQueue &_reqQueue,
 146                         SnoopRespPacketQueue &_snoopRespQueue) :
 147             QueuedMasterPort(_name, _cache, _reqQueue, _snoopRespQueue)
 148         { }
 149
 150         /**
 151          * Memory-side port always snoops.
 152          *
 153          * @return always true
 154          */
 155         virtual bool isSnooping() const { return true; }
 156     };
 157
 158     /**
 159      * Override the default behaviour of sendDeferredPacket to enable
 160      * the memory-side cache port to also send requests based on the
 161      * current MSHR status. This queue has a pointer to our specific
 162      * cache implementation and is used by the MemSidePort.
 163      */
 164     class CacheReqPacketQueue : public ReqPacketQueue
 165     {
 166
 167       protected:
 168
 169         BaseCache &cache;
 170         SnoopRespPacketQueue &snoopRespQueue;
 171
 172       public:
 173
 174         CacheReqPacketQueue(BaseCache &cache, MasterPort &port,
 175                             SnoopRespPacketQueue &snoop_resp_queue,
 176                             const std::string &label) :
 177             ReqPacketQueue(cache, port, label), cache(cache),
 178             snoopRespQueue(snoop_resp_queue) { }
 179
 180         /**
 181          * Override the normal sendDeferredPacket and do not only
 182          * consider the transmit list (used for responses), but also
 183          * requests.
 184          */
 185         virtual void sendDeferredPacket();
 186
 187         /**
 188          * Check if there is a conflicting snoop response about to be
 189          * send out, and if so simply stall any requests, and schedule
 190          * a send event at the same time as the next snoop response is
 191          * being sent out.
 192          *
 193          * @param pkt The packet to check for conflicts against.
 194          */
 195         bool checkConflictingSnoop(const PacketPtr pkt)
 196         {
 197             if (snoopRespQueue.checkConflict(pkt, cache.blkSize)) {
 198                 DPRINTF(CachePort, "Waiting for snoop response to be "
 199                         "sent\n");
 200                 Tick when = snoopRespQueue.deferredPacketReadyTime();
 201                 schedSendEvent(when);
 202                 return true;
 203             }
 204             return false;
 205         }
 206     };
 207
 208
 209     /**
 210      * The memory-side port extends the base cache master port with
 211      * access functions for functional, atomic and timing snoops.
 212      */
 213     class MemSidePort : public CacheMasterPort
 214     {
 215       private:
 216
 217         /** The cache-specific queue. */
 218         CacheReqPacketQueue _reqQueue;
 219
 220         SnoopRespPacketQueue _snoopRespQueue;
 221
 222         // a pointer to our specific cache implementation
 223         BaseCache *cache;
 224
 225       protected:
 226
 227         virtual void recvTimingSnoopReq(PacketPtr pkt);
 228
 229         virtual bool recvTimingResp(PacketPtr pkt);
 230
 231         virtual Tick recvAtomicSnoop(PacketPtr pkt);
 232
 233         virtual void recvFunctionalSnoop(PacketPtr pkt);
 234
 235       public:
 236
 237         MemSidePort(const std::string &_name, BaseCache *_cache,
 238                     const std::string &_label);
 239     };
 240
 241     /**
 242      * A cache slave port is used for the CPU-side port of the cache,
 243      * and it is basically a simple timing port that uses a transmit
 244      * list for responses to the CPU (or connected master). In
 245      * addition, it has the functionality to block the port for
 246      * incoming requests. If blocked, the port will issue a retry once
 247      * unblocked.
 248      */
 249     class CacheSlavePort : public QueuedSlavePort
 250     {
 251
 252       public:
 253
 254         /** Do not accept any new requests. */
 255         void setBlocked();
 256
 257         /** Return to normal operation and accept new requests. */
 258         void clearBlocked();
 259
 260         bool isBlocked() const { return blocked; }
 261
 262       protected:
 263
 264         CacheSlavePort(const std::string &_name, BaseCache *_cache,
 265                        const std::string &_label);
 266
 267         /** A normal packet queue used to store responses. */
 268         RespPacketQueue queue;
 269
 270         bool blocked;
 271
 272         bool mustSendRetry;
 273
 274       private:
 275
 276         void processSendRetry();
 277
 278         EventFunctionWrapper sendRetryEvent;
 279
 280     };
 281
 282     /**
 283      * The CPU-side port extends the base cache slave port with access
 284      * functions for functional, atomic and timing requests.
 285      */
 286     class CpuSidePort : public CacheSlavePort
 287     {
 288       private:
 289
 290         // a pointer to our specific cache implementation
 291         BaseCache *cache;
 292
 293       protected:
 294         virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
 295
 296         virtual bool tryTiming(PacketPtr pkt) override;
 297
 298         virtual bool recvTimingReq(PacketPtr pkt) override;
 299
 300         virtual Tick recvAtomic(PacketPtr pkt) override;
 301
 302         virtual void recvFunctional(PacketPtr pkt) override;
 303
 304         virtual AddrRangeList getAddrRanges() const override;
 305
 306       public:
 307
 308         CpuSidePort(const std::string &_name, BaseCache *_cache,
 309                     const std::string &_label);
 310
 311     };
 312
 313     CpuSidePort cpuSidePort;
 314     MemSidePort memSidePort;
 315
 316   protected:
 317
 318     /** Miss status registers */
 319     MSHRQueue mshrQueue;
 320
 321     /** Write/writeback buffer */
 322     WriteQueue writeBuffer;
 323
 324     /** Tag and data Storage */
 325     BaseTags *tags;
 326
 327     /** Prefetcher */
 328     BasePrefetcher *prefetcher;
 329
 330     /** To probe when a cache hit occurs */
 331     ProbePointArg<PacketPtr> *ppHit;
 332
 333     /** To probe when a cache miss occurs */
 334     ProbePointArg<PacketPtr> *ppMiss;
 335
 336     /** To probe when a cache fill occurs */
 337     ProbePointArg<PacketPtr> *ppFill;
 338
 339     /**
 340      * The writeAllocator drive optimizations for streaming writes.
 341      * It first determines whether a WriteReq MSHR should be delayed,
 342      * thus ensuring that we wait longer in cases when we are write
 343      * coalescing and allowing all the bytes of the line to be written
 344      * before the MSHR packet is sent downstream. This works in unison
 345      * with the tracking in the MSHR to check if the entire line is
 346      * written. The write mode also affects the behaviour on filling
 347      * any whole-line writes. Normally the cache allocates the line
 348      * when receiving the InvalidateResp, but after seeing enough
 349      * consecutive lines we switch to using the tempBlock, and thus
 350      * end up not allocating the line, and instead turning the
 351      * whole-line write into a writeback straight away.
 352      */
 353     WriteAllocator * const writeAllocator;
 354
 355     /**
 356      * Temporary cache block for occasional transitory use.  We use
 357      * the tempBlock to fill when allocation fails (e.g., when there
 358      * is an outstanding request that accesses the victim block) or
 359      * when we want to avoid allocation (e.g., exclusive caches)
 360      */
 361     TempCacheBlk *tempBlock;
 362
 363     /**
 364      * Upstream caches need this packet until true is returned, so
 365      * hold it for deletion until a subsequent call
 366      */
 367     std::unique_ptr<Packet> pendingDelete;
 368
 369     /**
 370      * Mark a request as in service (sent downstream in the memory
 371      * system), effectively making this MSHR the ordering point.
 372      */
 373     void markInService(MSHR *mshr, bool pending_modified_resp)
 374     {
 375         bool wasFull = mshrQueue.isFull();
 376         mshrQueue.markInService(mshr, pending_modified_resp);
 377
 378         if (wasFull && !mshrQueue.isFull()) {
 379             clearBlocked(Blocked_NoMSHRs);
 380         }
 381     }
 382
 383     void markInService(WriteQueueEntry *entry)
 384     {
 385         bool wasFull = writeBuffer.isFull();
 386         writeBuffer.markInService(entry);
 387
 388         if (wasFull && !writeBuffer.isFull()) {
 389             clearBlocked(Blocked_NoWBBuffers);
 390         }
 391     }
 392
 393     /**
 394      * Determine whether we should allocate on a fill or not. If this
 395      * cache is mostly inclusive with regards to the upstream cache(s)
 396      * we always allocate (for any non-forwarded and cacheable
 397      * requests). In the case of a mostly exclusive cache, we allocate
 398      * on fill if the packet did not come from a cache, thus if we:
 399      * are dealing with a whole-line write (the latter behaves much
 400      * like a writeback), the original target packet came from a
 401      * non-caching source, or if we are performing a prefetch or LLSC.
 402      *
 403      * @param cmd Command of the incoming requesting packet
 404      * @return Whether we should allocate on the fill
 405      */
 406     inline bool allocOnFill(MemCmd cmd) const
 407     {
 408         return clusivity == Enums::mostly_incl ||
 409             cmd == MemCmd::WriteLineReq ||
 410             cmd == MemCmd::ReadReq ||
 411             cmd == MemCmd::WriteReq ||
 412             cmd.isPrefetch() ||
 413             cmd.isLLSC();
 414     }
 415
 416     /**
 417      * Regenerate block address using tags.
 418      * Block address regeneration depends on whether we're using a temporary
 419      * block or not.
 420      *
 421      * @param blk The block to regenerate address.
 422      * @return The block's address.
 423      */
 424     Addr regenerateBlkAddr(CacheBlk* blk);
 425
 426     /**
 427      * Calculate latency of accesses that only touch the tag array.
 428      * @sa calculateAccessLatency
 429      *
 430      * @param delay The delay until the packet's metadata is present.
 431      * @param lookup_lat Latency of the respective tag lookup.
 432      * @return The number of ticks that pass due to a tag-only access.
 433      */
 434     Cycles calculateTagOnlyLatency(const uint32_t delay,
 435                                    const Cycles lookup_lat) const;
 436     /**
 437      * Calculate access latency in ticks given a tag lookup latency, and
 438      * whether access was a hit or miss.
 439      *
 440      * @param blk The cache block that was accessed.
 441      * @param delay The delay until the packet's metadata is present.
 442      * @param lookup_lat Latency of the respective tag lookup.
 443      * @return The number of ticks that pass due to a block access.
 444      */
 445     Cycles calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
 446                                   const Cycles lookup_lat) const;
 447
 448     /**
 449      * Does all the processing necessary to perform the provided request.
 450      * @param pkt The memory request to perform.
 451      * @param blk The cache block to be updated.
 452      * @param lat The latency of the access.
 453      * @param writebacks List for any writebacks that need to be performed.
 454      * @return Boolean indicating whether the request was satisfied.
 455      */
 456     virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 457                         PacketList &writebacks);
 458
 459     /*
 460      * Handle a timing request that hit in the cache
 461      *
 462      * @param ptk The request packet
 463      * @param blk The referenced block
 464      * @param request_time The tick at which the block lookup is compete
 465      */
 466     virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
 467                                     Tick request_time);
 468
 469     /*
 470      * Handle a timing request that missed in the cache
 471      *
 472      * Implementation specific handling for different cache
 473      * implementations
 474      *
 475      * @param ptk The request packet
 476      * @param blk The referenced block
 477      * @param forward_time The tick at which we can process dependent requests
 478      * @param request_time The tick at which the block lookup is compete
 479      */
 480     virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
 481                                      Tick forward_time,
 482                                      Tick request_time) = 0;
 483
 484     /*
 485      * Handle a timing request that missed in the cache
 486      *
 487      * Common functionality across different cache implementations
 488      *
 489      * @param ptk The request packet
 490      * @param blk The referenced block
 491      * @param mshr Any existing mshr for the referenced cache block
 492      * @param forward_time The tick at which we can process dependent requests
 493      * @param request_time The tick at which the block lookup is compete
 494      */
 495     void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
 496                              Tick forward_time, Tick request_time);
 497
 498     /**
 499      * Performs the access specified by the request.
 500      * @param pkt The request to perform.
 501      */
 502     virtual void recvTimingReq(PacketPtr pkt);
 503
 504     /**
 505      * Handling the special case of uncacheable write responses to
 506      * make recvTimingResp less cluttered.
 507      */
 508     void handleUncacheableWriteResp(PacketPtr pkt);
 509
 510     /**
 511      * Service non-deferred MSHR targets using the received response
 512      *
 513      * Iterates through the list of targets that can be serviced with
 514      * the current response.
 515      *
 516      * @param mshr The MSHR that corresponds to the reponse
 517      * @param pkt The response packet
 518      * @param blk The reference block
 519      */
 520     virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
 521                                     CacheBlk *blk) = 0;
 522
 523     /**
 524      * Handles a response (cache line fill/write ack) from the bus.
 525      * @param pkt The response packet
 526      */
 527     virtual void recvTimingResp(PacketPtr pkt);
 528
 529     /**
 530      * Snoops bus transactions to maintain coherence.
 531      * @param pkt The current bus transaction.
 532      */
 533     virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
 534
 535     /**
 536      * Handle a snoop response.
 537      * @param pkt Snoop response packet
 538      */
 539     virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
 540
 541     /**
 542      * Handle a request in atomic mode that missed in this cache
 543      *
 544      * Creates a downstream request, sends it to the memory below and
 545      * handles the response. As we are in atomic mode all operations
 546      * are performed immediately.
 547      *
 548      * @param pkt The packet with the requests
 549      * @param blk The referenced block
 550      * @param writebacks A list with packets for any performed writebacks
 551      * @return Cycles for handling the request
 552      */
 553     virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
 554                                        PacketList &writebacks) = 0;
 555
 556     /**
 557      * Performs the access specified by the request.
 558      * @param pkt The request to perform.
 559      * @return The number of ticks required for the access.
 560      */
 561     virtual Tick recvAtomic(PacketPtr pkt);
 562
 563     /**
 564      * Snoop for the provided request in the cache and return the estimated
 565      * time taken.
 566      * @param pkt The memory request to snoop
 567      * @return The number of ticks required for the snoop.
 568      */
 569     virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
 570
 571     /**
 572      * Performs the access specified by the request.
 573      *
 574      * @param pkt The request to perform.
 575      * @param fromCpuSide from the CPU side port or the memory side port
 576      */
 577     virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
 578
 579     /**
 580      * Handle doing the Compare and Swap function for SPARC.
 581      */
 582     void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
 583
 584     /**
 585      * Return the next queue entry to service, either a pending miss
 586      * from the MSHR queue, a buffered write from the write buffer, or
 587      * something from the prefetcher. This function is responsible
 588      * for prioritizing among those sources on the fly.
 589      */
 590     QueueEntry* getNextQueueEntry();
 591
 592     /**
 593      * Insert writebacks into the write buffer
 594      */
 595     virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
 596
 597     /**
 598      * Send writebacks down the memory hierarchy in atomic mode
 599      */
 600     virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
 601
 602     /**
 603      * Create an appropriate downstream bus request packet.
 604      *
 605      * Creates a new packet with the request to be send to the memory
 606      * below, or nullptr if the current request in cpu_pkt should just
 607      * be forwarded on.
 608      *
 609      * @param cpu_pkt The miss packet that needs to be satisfied.
 610      * @param blk The referenced block, can be nullptr.
 611      * @param needs_writable Indicates that the block must be writable
 612      * even if the request in cpu_pkt doesn't indicate that.
 613      * @param is_whole_line_write True if there are writes for the
 614      * whole line
 615      * @return A packet send to the memory below
 616      */
 617     virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
 618                                        bool needs_writable,
 619                                        bool is_whole_line_write) const = 0;
 620
 621     /**
 622      * Determine if clean lines should be written back or not. In
 623      * cases where a downstream cache is mostly inclusive we likely
 624      * want it to act as a victim cache also for lines that have not
 625      * been modified. Hence, we cannot simply drop the line (or send a
 626      * clean evict), but rather need to send the actual data.
 627      */
 628     const bool writebackClean;
 629
 630     /**
 631      * Writebacks from the tempBlock, resulting on the response path
 632      * in atomic mode, must happen after the call to recvAtomic has
 633      * finished (for the right ordering of the packets). We therefore
 634      * need to hold on to the packets, and have a method and an event
 635      * to send them.
 636      */
 637     PacketPtr tempBlockWriteback;
 638
 639     /**
 640      * Send the outstanding tempBlock writeback. To be called after
 641      * recvAtomic finishes in cases where the block we filled is in
 642      * fact the tempBlock, and now needs to be written back.
 643      */
 644     void writebackTempBlockAtomic() {
 645         assert(tempBlockWriteback != nullptr);
 646         PacketList writebacks{tempBlockWriteback};
 647         doWritebacksAtomic(writebacks);
 648         tempBlockWriteback = nullptr;
 649     }
 650
 651     /**
 652      * An event to writeback the tempBlock after recvAtomic
 653      * finishes. To avoid other calls to recvAtomic getting in
 654      * between, we create this event with a higher priority.
 655      */
 656     EventFunctionWrapper writebackTempBlockAtomicEvent;
 657
 658     /**
 659      * Perform any necessary updates to the block and perform any data
 660      * exchange between the packet and the block. The flags of the
 661      * packet are also set accordingly.
 662      *
 663      * @param pkt Request packet from upstream that hit a block
 664      * @param blk Cache block that the packet hit
 665      * @param deferred_response Whether this request originally missed
 666      * @param pending_downgrade Whether the writable flag is to be removed
 667      */
 668     virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
 669                                 bool deferred_response = false,
 670                                 bool pending_downgrade = false);
 671
 672     /**
 673      * Maintain the clusivity of this cache by potentially
 674      * invalidating a block. This method works in conjunction with
 675      * satisfyRequest, but is separate to allow us to handle all MSHR
 676      * targets before potentially dropping a block.
 677      *
 678      * @param from_cache Whether we have dealt with a packet from a cache
 679      * @param blk The block that should potentially be dropped
 680      */
 681     void maintainClusivity(bool from_cache, CacheBlk *blk);
 682
 683     /**
 684      * Handle a fill operation caused by a received packet.
 685      *
 686      * Populates a cache block and handles all outstanding requests for the
 687      * satisfied fill request. This version takes two memory requests. One
 688      * contains the fill data, the other is an optional target to satisfy.
 689      * Note that the reason we return a list of writebacks rather than
 690      * inserting them directly in the write buffer is that this function
 691      * is called by both atomic and timing-mode accesses, and in atomic
 692      * mode we don't mess with the write buffer (we just perform the
 693      * writebacks atomically once the original request is complete).
 694      *
 695      * @param pkt The memory request with the fill data.
 696      * @param blk The cache block if it already exists.
 697      * @param writebacks List for any writebacks that need to be performed.
 698      * @param allocate Whether to allocate a block or use the temp block
 699      * @return Pointer to the new cache block.
 700      */
 701     CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
 702                          PacketList &writebacks, bool allocate);
 703
 704     /**
 705      * Allocate a new block and perform any necessary writebacks
 706      *
 707      * Find a victim block and if necessary prepare writebacks for any
 708      * existing data. May return nullptr if there are no replaceable
 709      * blocks. If a replaceable block is found, it inserts the new block in
 710      * its place. The new block, however, is not set as valid yet.
 711      *
 712      * @param pkt Packet holding the address to update
 713      * @param writebacks A list of writeback packets for the evicted blocks
 714      * @return the allocated block
 715      */
 716     CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
 717     /**
 718      * Evict a cache block.
 719      *
 720      * Performs a writeback if necesssary and invalidates the block
 721      *
 722      * @param blk Block to invalidate
 723      * @return A packet with the writeback, can be nullptr
 724      */
 725     M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
 726
 727     /**
 728      * Evict a cache block.
 729      *
 730      * Performs a writeback if necesssary and invalidates the block
 731      *
 732      * @param blk Block to invalidate
 733      * @param writebacks Return a list of packets with writebacks
 734      */
 735     void evictBlock(CacheBlk *blk, PacketList &writebacks);
 736
 737     /**
 738      * Invalidate a cache block.
 739      *
 740      * @param blk Block to invalidate
 741      */
 742     void invalidateBlock(CacheBlk *blk);
 743
 744     /**
 745      * Create a writeback request for the given block.
 746      *
 747      * @param blk The block to writeback.
 748      * @return The writeback request for the block.
 749      */
 750     PacketPtr writebackBlk(CacheBlk *blk);
 751
 752     /**
 753      * Create a writeclean request for the given block.
 754      *
 755      * Creates a request that writes the block to the cache below
 756      * without evicting the block from the current cache.
 757      *
 758      * @param blk The block to write clean.
 759      * @param dest The destination of the write clean operation.
 760      * @param id Use the given packet id for the write clean operation.
 761      * @return The generated write clean packet.
 762      */
 763     PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
 764
 765     /**
 766      * Write back dirty blocks in the cache using functional accesses.
 767      */
 768     virtual void memWriteback() override;
 769
 770     /**
 771      * Invalidates all blocks in the cache.
 772      *
 773      * @warn Dirty cache lines will not be written back to
 774      * memory. Make sure to call functionalWriteback() first if you
 775      * want the to write them to memory.
 776      */
 777     virtual void memInvalidate() override;
 778
 779     /**
 780      * Determine if there are any dirty blocks in the cache.
 781      *
 782      * @return true if at least one block is dirty, false otherwise.
 783      */
 784     bool isDirty() const;
 785
 786     /**
 787      * Determine if an address is in the ranges covered by this
 788      * cache. This is useful to filter snoops.
 789      *
 790      * @param addr Address to check against
 791      *
 792      * @return If the address in question is in range
 793      */
 794     bool inRange(Addr addr) const;
 795
 796     /**
 797      * Find next request ready time from among possible sources.
 798      */
 799     Tick nextQueueReadyTime() const;
 800
 801     /** Block size of this cache */
 802     const unsigned blkSize;
 803
 804     /**
 805      * The latency of tag lookup of a cache. It occurs when there is
 806      * an access to the cache.
 807      */
 808     const Cycles lookupLatency;
 809
 810     /**
 811      * The latency of data access of a cache. It occurs when there is
 812      * an access to the cache.
 813      */
 814     const Cycles dataLatency;
 815
 816     /**
 817      * This is the forward latency of the cache. It occurs when there
 818      * is a cache miss and a request is forwarded downstream, in
 819      * particular an outbound miss.
 820      */
 821     const Cycles forwardLatency;
 822
 823     /** The latency to fill a cache block */
 824     const Cycles fillLatency;
 825
 826     /**
 827      * The latency of sending reponse to its upper level cache/core on
 828      * a linefill. The responseLatency parameter captures this
 829      * latency.
 830      */
 831     const Cycles responseLatency;
 832
 833     /**
 834      * Whether tags and data are accessed sequentially.
 835      */
 836     const bool sequentialAccess;
 837
 838     /** The number of targets for each MSHR. */
 839     const int numTarget;
 840
 841     /** Do we forward snoops from mem side port through to cpu side port? */
 842     bool forwardSnoops;
 843
 844     /**
 845      * Clusivity with respect to the upstream cache, determining if we
 846      * fill into both this cache and the cache above on a miss. Note
 847      * that we currently do not support strict clusivity policies.
 848      */
 849     const Enums::Clusivity clusivity;
 850
 851     /**
 852      * Is this cache read only, for example the instruction cache, or
 853      * table-walker cache. A cache that is read only should never see
 854      * any writes, and should never get any dirty data (and hence
 855      * never have to do any writebacks).
 856      */
 857     const bool isReadOnly;
 858
 859     /**
 860      * Bit vector of the blocking reasons for the access path.
 861      * @sa #BlockedCause
 862      */
 863     uint8_t blocked;
 864
 865     /** Increasing order number assigned to each incoming request. */
 866     uint64_t order;
 867
 868     /** Stores time the cache blocked for statistics. */
 869     Cycles blockedCycle;
 870
 871     /** Pointer to the MSHR that has no targets. */
 872     MSHR *noTargetMSHR;
 873
 874     /** The number of misses to trigger an exit event. */
 875     Counter missCount;
 876
 877     /**
 878      * The address range to which the cache responds on the CPU side.
 879      * Normally this is all possible memory addresses. */
 880     const AddrRangeList addrRanges;
 881
 882   public:
 883     /** System we are currently operating in. */
 884     System *system;
 885
 886     // Statistics
 887     /**
 888      * @addtogroup CacheStatistics
 889      * @{
 890      */
 891
 892     /** Number of hits per thread for each type of command.
 893         @sa Packet::Command */
 894     Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
 895     /** Number of hits for demand accesses. */
 896     Stats::Formula demandHits;
 897     /** Number of hit for all accesses. */
 898     Stats::Formula overallHits;
 899
 900     /** Number of misses per thread for each type of command.
 901         @sa Packet::Command */
 902     Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
 903     /** Number of misses for demand accesses. */
 904     Stats::Formula demandMisses;
 905     /** Number of misses for all accesses. */
 906     Stats::Formula overallMisses;
 907
 908     /**
 909      * Total number of cycles per thread/command spent waiting for a miss.
 910      * Used to calculate the average miss latency.
 911      */
 912     Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
 913     /** Total number of cycles spent waiting for demand misses. */
 914     Stats::Formula demandMissLatency;
 915     /** Total number of cycles spent waiting for all misses. */
 916     Stats::Formula overallMissLatency;
 917
 918     /** The number of accesses per command and thread. */
 919     Stats::Formula accesses[MemCmd::NUM_MEM_CMDS];
 920     /** The number of demand accesses. */
 921     Stats::Formula demandAccesses;
 922     /** The number of overall accesses. */
 923     Stats::Formula overallAccesses;
 924
 925     /** The miss rate per command and thread. */
 926     Stats::Formula missRate[MemCmd::NUM_MEM_CMDS];
 927     /** The miss rate of all demand accesses. */
 928     Stats::Formula demandMissRate;
 929     /** The miss rate for all accesses. */
 930     Stats::Formula overallMissRate;
 931
 932     /** The average miss latency per command and thread. */
 933     Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS];
 934     /** The average miss latency for demand misses. */
 935     Stats::Formula demandAvgMissLatency;
 936     /** The average miss latency for all misses. */
 937     Stats::Formula overallAvgMissLatency;
 938
 939     /** The total number of cycles blocked for each blocked cause. */
 940     Stats::Vector blocked_cycles;
 941     /** The number of times this cache blocked for each blocked cause. */
 942     Stats::Vector blocked_causes;
 943
 944     /** The average number of cycles blocked for each blocked cause. */
 945     Stats::Formula avg_blocked;
 946
 947     /** The number of times a HW-prefetched block is evicted w/o reference. */
 948     Stats::Scalar unusedPrefetches;
 949
 950     /** Number of blocks written back per thread. */
 951     Stats::Vector writebacks;
 952
 953     /** Number of misses that hit in the MSHRs per command and thread. */
 954     Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
 955     /** Demand misses that hit in the MSHRs. */
 956     Stats::Formula demandMshrHits;
 957     /** Total number of misses that hit in the MSHRs. */
 958     Stats::Formula overallMshrHits;
 959
 960     /** Number of misses that miss in the MSHRs, per command and thread. */
 961     Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
 962     /** Demand misses that miss in the MSHRs. */
 963     Stats::Formula demandMshrMisses;
 964     /** Total number of misses that miss in the MSHRs. */
 965     Stats::Formula overallMshrMisses;
 966
 967     /** Number of misses that miss in the MSHRs, per command and thread. */
 968     Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
 969     /** Total number of misses that miss in the MSHRs. */
 970     Stats::Formula overallMshrUncacheable;
 971
 972     /** Total cycle latency of each MSHR miss, per command and thread. */
 973     Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
 974     /** Total cycle latency of demand MSHR misses. */
 975     Stats::Formula demandMshrMissLatency;
 976     /** Total cycle latency of overall MSHR misses. */
 977     Stats::Formula overallMshrMissLatency;
 978
 979     /** Total cycle latency of each MSHR miss, per command and thread. */
 980     Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
 981     /** Total cycle latency of overall MSHR misses. */
 982     Stats::Formula overallMshrUncacheableLatency;
 983
 984 #if 0
 985     /** The total number of MSHR accesses per command and thread. */
 986     Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
 987     /** The total number of demand MSHR accesses. */
 988     Stats::Formula demandMshrAccesses;
 989     /** The total number of MSHR accesses. */
 990     Stats::Formula overallMshrAccesses;
 991 #endif
 992
 993     /** The miss rate in the MSHRs pre command and thread. */
 994     Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
 995     /** The demand miss rate in the MSHRs. */
 996     Stats::Formula demandMshrMissRate;
 997     /** The overall miss rate in the MSHRs. */
 998     Stats::Formula overallMshrMissRate;
 999
1000     /** The average latency of an MSHR miss, per command and thread. */
1001     Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
1002     /** The average latency of a demand MSHR miss. */
1003     Stats::Formula demandAvgMshrMissLatency;
1004     /** The average overall latency of an MSHR miss. */
1005     Stats::Formula overallAvgMshrMissLatency;
1006
1007     /** The average latency of an MSHR miss, per command and thread. */
1008     Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
1009     /** The average overall latency of an MSHR miss. */
1010     Stats::Formula overallAvgMshrUncacheableLatency;
1011
1012     /** Number of replacements of valid blocks. */
1013     Stats::Scalar replacements;
1014
1015     /**
1016      * @}
1017      */
1018
1019     /**
1020      * Register stats for this object.
1021      */
1022     void regStats() override;
1023
1024     /** Registers probes. */
1025     void regProbePoints() override;
1026
1027   public:
1028     BaseCache(const BaseCacheParams *p, unsigned blk_size);
1029     ~BaseCache();
1030
1031     void init() override;
1032
1033     Port &getPort(const std::string &if_name,
1034                   PortID idx=InvalidPortID) override;
1035
1036     /**
1037      * Query block size of a cache.
1038      * @return  The block size
1039      */
1040     unsigned
1041     getBlockSize() const
1042     {
1043         return blkSize;
1044     }
1045
1046     const AddrRangeList &getAddrRanges() const { return addrRanges; }
1047
1048     MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1049     {
1050         MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1051                                         pkt, time, order++,
1052                                         allocOnFill(pkt->cmd));
1053
1054         if (mshrQueue.isFull()) {
1055             setBlocked((BlockedCause)MSHRQueue_MSHRs);
1056         }
1057
1058         if (sched_send) {
1059             // schedule the send
1060             schedMemSideSendEvent(time);
1061         }
1062
1063         return mshr;
1064     }
1065
1066     void allocateWriteBuffer(PacketPtr pkt, Tick time)
1067     {
1068         // should only see writes or clean evicts here
1069         assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1070
1071         Addr blk_addr = pkt->getBlockAddr(blkSize);
1072
1073         WriteQueueEntry *wq_entry =
1074             writeBuffer.findMatch(blk_addr, pkt->isSecure());
1075         if (wq_entry && !wq_entry->inService) {
1076             DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1077         }
1078
1079         writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1080
1081         if (writeBuffer.isFull()) {
1082             setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1083         }
1084
1085         // schedule the send
1086         schedMemSideSendEvent(time);
1087     }
1088
1089     /**
1090      * Returns true if the cache is blocked for accesses.
1091      */
1092     bool isBlocked() const
1093     {
1094         return blocked != 0;
1095     }
1096
1097     /**
1098      * Marks the access path of the cache as blocked for the given cause. This
1099      * also sets the blocked flag in the slave interface.
1100      * @param cause The reason for the cache blocking.
1101      */
1102     void setBlocked(BlockedCause cause)
1103     {
1104         uint8_t flag = 1 << cause;
1105         if (blocked == 0) {
1106             blocked_causes[cause]++;
1107             blockedCycle = curCycle();
1108             cpuSidePort.setBlocked();
1109         }
1110         blocked |= flag;
1111         DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1112     }
1113
1114     /**
1115      * Marks the cache as unblocked for the given cause. This also clears the
1116      * blocked flags in the appropriate interfaces.
1117      * @param cause The newly unblocked cause.
1118      * @warning Calling this function can cause a blocked request on the bus to
1119      * access the cache. The cache must be in a state to handle that request.
1120      */
1121     void clearBlocked(BlockedCause cause)
1122     {
1123         uint8_t flag = 1 << cause;
1124         blocked &= ~flag;
1125         DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1126         if (blocked == 0) {
1127             blocked_cycles[cause] += curCycle() - blockedCycle;
1128             cpuSidePort.clearBlocked();
1129         }
1130     }
1131
1132     /**
1133      * Schedule a send event for the memory-side port. If already
1134      * scheduled, this may reschedule the event at an earlier
1135      * time. When the specified time is reached, the port is free to
1136      * send either a response, a request, or a prefetch request.
1137      *
1138      * @param time The time when to attempt sending a packet.
1139      */
1140     void schedMemSideSendEvent(Tick time)
1141     {
1142         memSidePort.schedSendEvent(time);
1143     }
1144
1145     bool inCache(Addr addr, bool is_secure) const {
1146         return tags->findBlock(addr, is_secure);
1147     }
1148
1149     bool hasBeenPrefetched(Addr addr, bool is_secure) const {
1150         CacheBlk *block = tags->findBlock(addr, is_secure);
1151         if (block) {
1152             return block->wasPrefetched();
1153         } else {
1154             return false;
1155         }
1156     }
1157
1158     bool inMissQueue(Addr addr, bool is_secure) const {
1159         return mshrQueue.findMatch(addr, is_secure);
1160     }
1161
1162     void incMissCount(PacketPtr pkt)
1163     {
1164         assert(pkt->req->masterId() < system->maxMasters());
1165         misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
1166         pkt->req->incAccessDepth();
1167         if (missCount) {
1168             --missCount;
1169             if (missCount == 0)
1170                 exitSimLoop("A cache reached the maximum miss count");
1171         }
1172     }
1173     void incHitCount(PacketPtr pkt)
1174     {
1175         assert(pkt->req->masterId() < system->maxMasters());
1176         hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
1177
1178     }
1179
1180     /**
1181      * Checks if the cache is coalescing writes
1182      *
1183      * @return True if the cache is coalescing writes
1184      */
1185     bool coalesce() const;
1186
1187
1188     /**
1189      * Cache block visitor that writes back dirty cache blocks using
1190      * functional writes.
1191      */
1192     void writebackVisitor(CacheBlk &blk);
1193
1194     /**
1195      * Cache block visitor that invalidates all blocks in the cache.
1196      *
1197      * @warn Dirty cache lines will not be written back to memory.
1198      */
1199     void invalidateVisitor(CacheBlk &blk);
1200
1201     /**
1202      * Take an MSHR, turn it into a suitable downstream packet, and
1203      * send it out. This construct allows a queue entry to choose a suitable
1204      * approach based on its type.
1205      *
1206      * @param mshr The MSHR to turn into a packet and send
1207      * @return True if the port is waiting for a retry
1208      */
1209     virtual bool sendMSHRQueuePacket(MSHR* mshr);
1210
1211     /**
1212      * Similar to sendMSHR, but for a write-queue entry
1213      * instead. Create the packet, and send it, and if successful also
1214      * mark the entry in service.
1215      *
1216      * @param wq_entry The write-queue entry to turn into a packet and send
1217      * @return True if the port is waiting for a retry
1218      */
1219     bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1220
1221     /**
1222      * Serialize the state of the caches
1223      *
1224      * We currently don't support checkpointing cache state, so this panics.
1225      */
1226     void serialize(CheckpointOut &cp) const override;
1227     void unserialize(CheckpointIn &cp) override;
1228 };
1229
1230 /**
1231  * The write allocator inspects write packets and detects streaming
1232  * patterns. The write allocator supports a single stream where writes
1233  * are expected to access consecutive locations and keeps track of
1234  * size of the area covered by the concecutive writes in byteCount.
1235  *
1236  * 1) When byteCount has surpassed the coallesceLimit the mode
1237  * switches from ALLOCATE to COALESCE where writes should be delayed
1238  * until the whole block is written at which point a single packet
1239  * (whole line write) can service them.
1240  *
1241  * 2) When byteCount has also exceeded the noAllocateLimit (whole
1242  * line) we switch to NO_ALLOCATE when writes should not allocate in
1243  * the cache but rather send a whole line write to the memory below.
1244  */
1245 class WriteAllocator : public SimObject {
1246   public:
1247     WriteAllocator(const WriteAllocatorParams *p) :
1248         SimObject(p),
1249         coalesceLimit(p->coalesce_limit * p->block_size),
1250         noAllocateLimit(p->no_allocate_limit * p->block_size),
1251         delayThreshold(p->delay_threshold)
1252     {
1253         reset();
1254     }
1255
1256     /**
1257      * Should writes be coalesced? This is true if the mode is set to
1258      * NO_ALLOCATE.
1259      *
1260      * @return return true if the cache should coalesce writes.
1261      */
1262     bool coalesce() const {
1263         return mode != WriteMode::ALLOCATE;
1264     }
1265
1266     /**
1267      * Should writes allocate?
1268      *
1269      * @return return true if the cache should not allocate for writes.
1270      */
1271     bool allocate() const {
1272         return mode != WriteMode::NO_ALLOCATE;
1273     }
1274
1275     /**
1276      * Reset the write allocator state, meaning that it allocates for
1277      * writes and has not recorded any information about qualifying
1278      * writes that might trigger a switch to coalescing and later no
1279      * allocation.
1280      */
1281     void reset() {
1282         mode = WriteMode::ALLOCATE;
1283         byteCount = 0;
1284         nextAddr = 0;
1285     }
1286
1287     /**
1288      * Access whether we need to delay the current write.
1289      *
1290      * @param blk_addr The block address the packet writes to
1291      * @return true if the current packet should be delayed
1292      */
1293     bool delay(Addr blk_addr) {
1294         if (delayCtr[blk_addr] > 0) {
1295             --delayCtr[blk_addr];
1296             return true;
1297         } else {
1298             return false;
1299         }
1300     }
1301
1302     /**
1303      * Clear delay counter for the input block
1304      *
1305      * @param blk_addr The accessed cache block
1306      */
1307     void resetDelay(Addr blk_addr) {
1308         delayCtr.erase(blk_addr);
1309     }
1310
1311     /**
1312      * Update the write mode based on the current write
1313      * packet. This method compares the packet's address with any
1314      * current stream, and updates the tracking and the mode
1315      * accordingly.
1316      *
1317      * @param write_addr Start address of the write request
1318      * @param write_size Size of the write request
1319      * @param blk_addr The block address that this packet writes to
1320      */
1321     void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1322
1323   private:
1324     /**
1325      * The current mode for write coalescing and allocation, either
1326      * normal operation (ALLOCATE), write coalescing (COALESCE), or
1327      * write coalescing without allocation (NO_ALLOCATE).
1328      */
1329     enum class WriteMode : char {
1330         ALLOCATE,
1331         COALESCE,
1332         NO_ALLOCATE,
1333     };
1334     WriteMode mode;
1335
1336     /** Address to match writes against to detect streams. */
1337     Addr nextAddr;
1338
1339     /**
1340      * Bytes written contiguously. Saturating once we no longer
1341      * allocate.
1342      */
1343     uint32_t byteCount;
1344
1345     /**
1346      * Limits for when to switch between the different write modes.
1347      */
1348     const uint32_t coalesceLimit;
1349     const uint32_t noAllocateLimit;
1350     /**
1351      * The number of times the allocator will delay an WriteReq MSHR.
1352      */
1353     const uint32_t delayThreshold;
1354
1355     /**
1356      * Keep track of the number of times the allocator has delayed an
1357      * WriteReq MSHR.
1358      */
1359     std::unordered_map<Addr, Counter> delayCtr;
1360 };
1361
1362 #endif //__MEM_CACHE_BASE_HH__