2 * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2003-2005 The Regents of The University of Michigan
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * Declares a basic cache interface BaseCache.
46 #ifndef __MEM_CACHE_BASE_HH__
47 #define __MEM_CACHE_BASE_HH__
53 #include "base/addr_range.hh"
54 #include "base/statistics.hh"
55 #include "base/trace.hh"
56 #include "base/types.hh"
57 #include "debug/Cache.hh"
58 #include "debug/CachePort.hh"
59 #include "enums/Clusivity.hh"
60 #include "mem/cache/cache_blk.hh"
61 #include "mem/cache/compressors/base.hh"
62 #include "mem/cache/mshr_queue.hh"
63 #include "mem/cache/tags/base.hh"
64 #include "mem/cache/write_queue.hh"
65 #include "mem/cache/write_queue_entry.hh"
66 #include "mem/packet.hh"
67 #include "mem/packet_queue.hh"
68 #include "mem/qport.hh"
69 #include "mem/request.hh"
70 #include "params/WriteAllocator.hh"
71 #include "sim/clocked_object.hh"
72 #include "sim/eventq.hh"
73 #include "sim/probe/probe.hh"
74 #include "sim/serialize.hh"
75 #include "sim/sim_exit.hh"
76 #include "sim/system.hh"
78 namespace Prefetcher {
84 struct BaseCacheParams;
87 * A basic cache interface. Implements some common functions for speed.
89 class BaseCache : public ClockedObject
93 * Indexes to enumerate the MSHR queues.
102 * Reasons for caches to be blocked.
105 Blocked_NoMSHRs = MSHRQueue_MSHRs,
106 Blocked_NoWBBuffers = MSHRQueue_WriteBuffer,
112 * A data contents update is composed of the updated block's address,
113 * the old contents, and the new contents.
118 /** The updated block's address. */
120 /** Whether the block belongs to the secure address space. */
122 /** The stale data contents. If zero-sized this update is a fill. */
123 std::vector<uint64_t> oldData;
124 /** The new data contents. If zero-sized this is an invalidation. */
125 std::vector<uint64_t> newData;
127 DataUpdate(Addr _addr, bool is_secure)
128 : addr(_addr), isSecure(is_secure), oldData(), newData()
136 * A cache request port is used for the memory-side port of the
137 * cache, and in addition to the basic timing port that only sends
138 * response packets through a transmit list, it also offers the
139 * ability to schedule and send request packets (requests &
140 * writebacks). The send event is scheduled through schedSendEvent,
141 * and the sendDeferredPacket of the timing port is modified to
142 * consider both the transmit list and the requests from the MSHR.
144 class CacheRequestPort : public QueuedRequestPort
150 * Schedule a send of a request packet (from the MSHR). Note
151 * that we could already have a retry outstanding.
153 void schedSendEvent(Tick time)
155 DPRINTF(CachePort, "Scheduling send event at %llu\n", time);
156 reqQueue.schedSendEvent(time);
161 CacheRequestPort(const std::string &_name, BaseCache *_cache,
162 ReqPacketQueue &_reqQueue,
163 SnoopRespPacketQueue &_snoopRespQueue) :
164 QueuedRequestPort(_name, _cache, _reqQueue, _snoopRespQueue)
168 * Memory-side port always snoops.
170 * @return always true
172 virtual bool isSnooping() const { return true; }
176 * Override the default behaviour of sendDeferredPacket to enable
177 * the memory-side cache port to also send requests based on the
178 * current MSHR status. This queue has a pointer to our specific
179 * cache implementation and is used by the MemSidePort.
181 class CacheReqPacketQueue : public ReqPacketQueue
187 SnoopRespPacketQueue &snoopRespQueue;
191 CacheReqPacketQueue(BaseCache &cache, RequestPort &port,
192 SnoopRespPacketQueue &snoop_resp_queue,
193 const std::string &label) :
194 ReqPacketQueue(cache, port, label), cache(cache),
195 snoopRespQueue(snoop_resp_queue) { }
198 * Override the normal sendDeferredPacket and do not only
199 * consider the transmit list (used for responses), but also
202 virtual void sendDeferredPacket();
205 * Check if there is a conflicting snoop response about to be
206 * send out, and if so simply stall any requests, and schedule
207 * a send event at the same time as the next snoop response is
210 * @param pkt The packet to check for conflicts against.
212 bool checkConflictingSnoop(const PacketPtr pkt)
214 if (snoopRespQueue.checkConflict(pkt, cache.blkSize)) {
215 DPRINTF(CachePort, "Waiting for snoop response to be "
217 Tick when = snoopRespQueue.deferredPacketReadyTime();
218 schedSendEvent(when);
227 * The memory-side port extends the base cache request port with
228 * access functions for functional, atomic and timing snoops.
230 class MemSidePort : public CacheRequestPort
234 /** The cache-specific queue. */
235 CacheReqPacketQueue _reqQueue;
237 SnoopRespPacketQueue _snoopRespQueue;
239 // a pointer to our specific cache implementation
244 virtual void recvTimingSnoopReq(PacketPtr pkt);
246 virtual bool recvTimingResp(PacketPtr pkt);
248 virtual Tick recvAtomicSnoop(PacketPtr pkt);
250 virtual void recvFunctionalSnoop(PacketPtr pkt);
254 MemSidePort(const std::string &_name, BaseCache *_cache,
255 const std::string &_label);
259 * A cache response port is used for the CPU-side port of the cache,
260 * and it is basically a simple timing port that uses a transmit
261 * list for responses to the CPU (or connected requestor). In
262 * addition, it has the functionality to block the port for
263 * incoming requests. If blocked, the port will issue a retry once
266 class CacheResponsePort : public QueuedResponsePort
271 /** Do not accept any new requests. */
274 /** Return to normal operation and accept new requests. */
277 bool isBlocked() const { return blocked; }
281 CacheResponsePort(const std::string &_name, BaseCache *_cache,
282 const std::string &_label);
284 /** A normal packet queue used to store responses. */
285 RespPacketQueue queue;
293 void processSendRetry();
295 EventFunctionWrapper sendRetryEvent;
300 * The CPU-side port extends the base cache response port with access
301 * functions for functional, atomic and timing requests.
303 class CpuSidePort : public CacheResponsePort
307 // a pointer to our specific cache implementation
311 virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
313 virtual bool tryTiming(PacketPtr pkt) override;
315 virtual bool recvTimingReq(PacketPtr pkt) override;
317 virtual Tick recvAtomic(PacketPtr pkt) override;
319 virtual void recvFunctional(PacketPtr pkt) override;
321 virtual AddrRangeList getAddrRanges() const override;
325 CpuSidePort(const std::string &_name, BaseCache *_cache,
326 const std::string &_label);
330 CpuSidePort cpuSidePort;
331 MemSidePort memSidePort;
335 /** Miss status registers */
338 /** Write/writeback buffer */
339 WriteQueue writeBuffer;
341 /** Tag and data Storage */
344 /** Compression method being used. */
345 Compressor::Base* compressor;
348 Prefetcher::Base *prefetcher;
350 /** To probe when a cache hit occurs */
351 ProbePointArg<PacketPtr> *ppHit;
353 /** To probe when a cache miss occurs */
354 ProbePointArg<PacketPtr> *ppMiss;
356 /** To probe when a cache fill occurs */
357 ProbePointArg<PacketPtr> *ppFill;
360 * To probe when the contents of a block are updated. Content updates
361 * include data fills, overwrites, and invalidations, which means that
362 * this probe partially overlaps with other probes.
364 ProbePointArg<DataUpdate> *ppDataUpdate;
367 * The writeAllocator drive optimizations for streaming writes.
368 * It first determines whether a WriteReq MSHR should be delayed,
369 * thus ensuring that we wait longer in cases when we are write
370 * coalescing and allowing all the bytes of the line to be written
371 * before the MSHR packet is sent downstream. This works in unison
372 * with the tracking in the MSHR to check if the entire line is
373 * written. The write mode also affects the behaviour on filling
374 * any whole-line writes. Normally the cache allocates the line
375 * when receiving the InvalidateResp, but after seeing enough
376 * consecutive lines we switch to using the tempBlock, and thus
377 * end up not allocating the line, and instead turning the
378 * whole-line write into a writeback straight away.
380 WriteAllocator * const writeAllocator;
383 * Temporary cache block for occasional transitory use. We use
384 * the tempBlock to fill when allocation fails (e.g., when there
385 * is an outstanding request that accesses the victim block) or
386 * when we want to avoid allocation (e.g., exclusive caches)
388 TempCacheBlk *tempBlock;
391 * Upstream caches need this packet until true is returned, so
392 * hold it for deletion until a subsequent call
394 std::unique_ptr<Packet> pendingDelete;
397 * Mark a request as in service (sent downstream in the memory
398 * system), effectively making this MSHR the ordering point.
400 void markInService(MSHR *mshr, bool pending_modified_resp)
402 bool wasFull = mshrQueue.isFull();
403 mshrQueue.markInService(mshr, pending_modified_resp);
405 if (wasFull && !mshrQueue.isFull()) {
406 clearBlocked(Blocked_NoMSHRs);
410 void markInService(WriteQueueEntry *entry)
412 bool wasFull = writeBuffer.isFull();
413 writeBuffer.markInService(entry);
415 if (wasFull && !writeBuffer.isFull()) {
416 clearBlocked(Blocked_NoWBBuffers);
421 * Determine whether we should allocate on a fill or not. If this
422 * cache is mostly inclusive with regards to the upstream cache(s)
423 * we always allocate (for any non-forwarded and cacheable
424 * requests). In the case of a mostly exclusive cache, we allocate
425 * on fill if the packet did not come from a cache, thus if we:
426 * are dealing with a whole-line write (the latter behaves much
427 * like a writeback), the original target packet came from a
428 * non-caching source, or if we are performing a prefetch or LLSC.
430 * @param cmd Command of the incoming requesting packet
431 * @return Whether we should allocate on the fill
433 inline bool allocOnFill(MemCmd cmd) const
435 return clusivity == Enums::mostly_incl ||
436 cmd == MemCmd::WriteLineReq ||
437 cmd == MemCmd::ReadReq ||
438 cmd == MemCmd::WriteReq ||
444 * Regenerate block address using tags.
445 * Block address regeneration depends on whether we're using a temporary
448 * @param blk The block to regenerate address.
449 * @return The block's address.
451 Addr regenerateBlkAddr(CacheBlk* blk);
454 * Calculate latency of accesses that only touch the tag array.
455 * @sa calculateAccessLatency
457 * @param delay The delay until the packet's metadata is present.
458 * @param lookup_lat Latency of the respective tag lookup.
459 * @return The number of ticks that pass due to a tag-only access.
461 Cycles calculateTagOnlyLatency(const uint32_t delay,
462 const Cycles lookup_lat) const;
464 * Calculate access latency in ticks given a tag lookup latency, and
465 * whether access was a hit or miss.
467 * @param blk The cache block that was accessed.
468 * @param delay The delay until the packet's metadata is present.
469 * @param lookup_lat Latency of the respective tag lookup.
470 * @return The number of ticks that pass due to a block access.
472 Cycles calculateAccessLatency(const CacheBlk* blk, const uint32_t delay,
473 const Cycles lookup_lat) const;
476 * Does all the processing necessary to perform the provided request.
477 * @param pkt The memory request to perform.
478 * @param blk The cache block to be updated.
479 * @param lat The latency of the access.
480 * @param writebacks List for any writebacks that need to be performed.
481 * @return Boolean indicating whether the request was satisfied.
483 virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
484 PacketList &writebacks);
487 * Handle a timing request that hit in the cache
489 * @param ptk The request packet
490 * @param blk The referenced block
491 * @param request_time The tick at which the block lookup is compete
493 virtual void handleTimingReqHit(PacketPtr pkt, CacheBlk *blk,
497 * Handle a timing request that missed in the cache
499 * Implementation specific handling for different cache
502 * @param ptk The request packet
503 * @param blk The referenced block
504 * @param forward_time The tick at which we can process dependent requests
505 * @param request_time The tick at which the block lookup is compete
507 virtual void handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk,
509 Tick request_time) = 0;
512 * Handle a timing request that missed in the cache
514 * Common functionality across different cache implementations
516 * @param ptk The request packet
517 * @param blk The referenced block
518 * @param mshr Any existing mshr for the referenced cache block
519 * @param forward_time The tick at which we can process dependent requests
520 * @param request_time The tick at which the block lookup is compete
522 void handleTimingReqMiss(PacketPtr pkt, MSHR *mshr, CacheBlk *blk,
523 Tick forward_time, Tick request_time);
526 * Performs the access specified by the request.
527 * @param pkt The request to perform.
529 virtual void recvTimingReq(PacketPtr pkt);
532 * Handling the special case of uncacheable write responses to
533 * make recvTimingResp less cluttered.
535 void handleUncacheableWriteResp(PacketPtr pkt);
538 * Service non-deferred MSHR targets using the received response
540 * Iterates through the list of targets that can be serviced with
541 * the current response.
543 * @param mshr The MSHR that corresponds to the reponse
544 * @param pkt The response packet
545 * @param blk The reference block
547 virtual void serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt,
551 * Handles a response (cache line fill/write ack) from the bus.
552 * @param pkt The response packet
554 virtual void recvTimingResp(PacketPtr pkt);
557 * Snoops bus transactions to maintain coherence.
558 * @param pkt The current bus transaction.
560 virtual void recvTimingSnoopReq(PacketPtr pkt) = 0;
563 * Handle a snoop response.
564 * @param pkt Snoop response packet
566 virtual void recvTimingSnoopResp(PacketPtr pkt) = 0;
569 * Handle a request in atomic mode that missed in this cache
571 * Creates a downstream request, sends it to the memory below and
572 * handles the response. As we are in atomic mode all operations
573 * are performed immediately.
575 * @param pkt The packet with the requests
576 * @param blk The referenced block
577 * @param writebacks A list with packets for any performed writebacks
578 * @return Cycles for handling the request
580 virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
581 PacketList &writebacks) = 0;
584 * Performs the access specified by the request.
585 * @param pkt The request to perform.
586 * @return The number of ticks required for the access.
588 virtual Tick recvAtomic(PacketPtr pkt);
591 * Snoop for the provided request in the cache and return the estimated
593 * @param pkt The memory request to snoop
594 * @return The number of ticks required for the snoop.
596 virtual Tick recvAtomicSnoop(PacketPtr pkt) = 0;
599 * Performs the access specified by the request.
601 * @param pkt The request to perform.
602 * @param fromCpuSide from the CPU side port or the memory side port
604 virtual void functionalAccess(PacketPtr pkt, bool from_cpu_side);
607 * Update the data contents of a block. When no packet is provided no
608 * data will be written to the block, which means that this was likely
609 * triggered by an invalidation.
611 * @param blk The block being updated.
612 * @param cpkt The packet containing the new data.
613 * @param has_old_data Whether this block had data previously.
615 void updateBlockData(CacheBlk *blk, const PacketPtr cpkt,
619 * Handle doing the Compare and Swap function for SPARC.
621 void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
624 * Return the next queue entry to service, either a pending miss
625 * from the MSHR queue, a buffered write from the write buffer, or
626 * something from the prefetcher. This function is responsible
627 * for prioritizing among those sources on the fly.
629 QueueEntry* getNextQueueEntry();
632 * Insert writebacks into the write buffer
634 virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
637 * Send writebacks down the memory hierarchy in atomic mode
639 virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
642 * Create an appropriate downstream bus request packet.
644 * Creates a new packet with the request to be send to the memory
645 * below, or nullptr if the current request in cpu_pkt should just
648 * @param cpu_pkt The miss packet that needs to be satisfied.
649 * @param blk The referenced block, can be nullptr.
650 * @param needs_writable Indicates that the block must be writable
651 * even if the request in cpu_pkt doesn't indicate that.
652 * @param is_whole_line_write True if there are writes for the
654 * @return A packet send to the memory below
656 virtual PacketPtr createMissPacket(PacketPtr cpu_pkt, CacheBlk *blk,
658 bool is_whole_line_write) const = 0;
661 * Determine if clean lines should be written back or not. In
662 * cases where a downstream cache is mostly inclusive we likely
663 * want it to act as a victim cache also for lines that have not
664 * been modified. Hence, we cannot simply drop the line (or send a
665 * clean evict), but rather need to send the actual data.
667 const bool writebackClean;
670 * Writebacks from the tempBlock, resulting on the response path
671 * in atomic mode, must happen after the call to recvAtomic has
672 * finished (for the right ordering of the packets). We therefore
673 * need to hold on to the packets, and have a method and an event
676 PacketPtr tempBlockWriteback;
679 * Send the outstanding tempBlock writeback. To be called after
680 * recvAtomic finishes in cases where the block we filled is in
681 * fact the tempBlock, and now needs to be written back.
683 void writebackTempBlockAtomic() {
684 assert(tempBlockWriteback != nullptr);
685 PacketList writebacks{tempBlockWriteback};
686 doWritebacksAtomic(writebacks);
687 tempBlockWriteback = nullptr;
691 * An event to writeback the tempBlock after recvAtomic
692 * finishes. To avoid other calls to recvAtomic getting in
693 * between, we create this event with a higher priority.
695 EventFunctionWrapper writebackTempBlockAtomicEvent;
698 * When a block is overwriten, its compression information must be updated,
699 * and it may need to be recompressed. If the compression size changes, the
700 * block may either become smaller, in which case there is no side effect,
701 * or bigger (data expansion; fat write), in which case the block might not
702 * fit in its current location anymore. If that happens, there are usually
703 * two options to be taken:
705 * - The co-allocated blocks must be evicted to make room for this block.
706 * Simpler, but ignores replacement data.
707 * - The block itself is moved elsewhere (used in policies where the CF
708 * determines the location of the block).
710 * This implementation uses the first approach.
712 * Notice that this is only called for writebacks, which means that L1
713 * caches (which see regular Writes), do not support compression.
716 * @param blk The block to be overwriten.
717 * @param data A pointer to the data to be compressed (blk's new data).
718 * @param writebacks List for any writebacks that need to be performed.
719 * @return Whether operation is successful or not.
721 bool updateCompressionData(CacheBlk *&blk, const uint64_t* data,
722 PacketList &writebacks);
725 * Perform any necessary updates to the block and perform any data
726 * exchange between the packet and the block. The flags of the
727 * packet are also set accordingly.
729 * @param pkt Request packet from upstream that hit a block
730 * @param blk Cache block that the packet hit
731 * @param deferred_response Whether this request originally missed
732 * @param pending_downgrade Whether the writable flag is to be removed
734 virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
735 bool deferred_response = false,
736 bool pending_downgrade = false);
739 * Maintain the clusivity of this cache by potentially
740 * invalidating a block. This method works in conjunction with
741 * satisfyRequest, but is separate to allow us to handle all MSHR
742 * targets before potentially dropping a block.
744 * @param from_cache Whether we have dealt with a packet from a cache
745 * @param blk The block that should potentially be dropped
747 void maintainClusivity(bool from_cache, CacheBlk *blk);
750 * Try to evict the given blocks. If any of them is a transient eviction,
751 * that is, the block is present in the MSHR queue all evictions are
752 * cancelled since handling such cases has not been implemented.
754 * @param evict_blks Blocks marked for eviction.
755 * @param writebacks List for any writebacks that need to be performed.
756 * @return False if any of the evicted blocks is in transient state.
758 bool handleEvictions(std::vector<CacheBlk*> &evict_blks,
759 PacketList &writebacks);
762 * Handle a fill operation caused by a received packet.
764 * Populates a cache block and handles all outstanding requests for the
765 * satisfied fill request. This version takes two memory requests. One
766 * contains the fill data, the other is an optional target to satisfy.
767 * Note that the reason we return a list of writebacks rather than
768 * inserting them directly in the write buffer is that this function
769 * is called by both atomic and timing-mode accesses, and in atomic
770 * mode we don't mess with the write buffer (we just perform the
771 * writebacks atomically once the original request is complete).
773 * @param pkt The memory request with the fill data.
774 * @param blk The cache block if it already exists.
775 * @param writebacks List for any writebacks that need to be performed.
776 * @param allocate Whether to allocate a block or use the temp block
777 * @return Pointer to the new cache block.
779 CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
780 PacketList &writebacks, bool allocate);
783 * Allocate a new block and perform any necessary writebacks
785 * Find a victim block and if necessary prepare writebacks for any
786 * existing data. May return nullptr if there are no replaceable
787 * blocks. If a replaceable block is found, it inserts the new block in
788 * its place. The new block, however, is not set as valid yet.
790 * @param pkt Packet holding the address to update
791 * @param writebacks A list of writeback packets for the evicted blocks
792 * @return the allocated block
794 CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
796 * Evict a cache block.
798 * Performs a writeback if necesssary and invalidates the block
800 * @param blk Block to invalidate
801 * @return A packet with the writeback, can be nullptr
803 M5_NODISCARD virtual PacketPtr evictBlock(CacheBlk *blk) = 0;
806 * Evict a cache block.
808 * Performs a writeback if necesssary and invalidates the block
810 * @param blk Block to invalidate
811 * @param writebacks Return a list of packets with writebacks
813 void evictBlock(CacheBlk *blk, PacketList &writebacks);
816 * Invalidate a cache block.
818 * @param blk Block to invalidate
820 void invalidateBlock(CacheBlk *blk);
823 * Create a writeback request for the given block.
825 * @param blk The block to writeback.
826 * @return The writeback request for the block.
828 PacketPtr writebackBlk(CacheBlk *blk);
831 * Create a writeclean request for the given block.
833 * Creates a request that writes the block to the cache below
834 * without evicting the block from the current cache.
836 * @param blk The block to write clean.
837 * @param dest The destination of the write clean operation.
838 * @param id Use the given packet id for the write clean operation.
839 * @return The generated write clean packet.
841 PacketPtr writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id);
844 * Write back dirty blocks in the cache using functional accesses.
846 virtual void memWriteback() override;
849 * Invalidates all blocks in the cache.
851 * @warn Dirty cache lines will not be written back to
852 * memory. Make sure to call functionalWriteback() first if you
853 * want the to write them to memory.
855 virtual void memInvalidate() override;
858 * Determine if there are any dirty blocks in the cache.
860 * @return true if at least one block is dirty, false otherwise.
862 bool isDirty() const;
865 * Determine if an address is in the ranges covered by this
866 * cache. This is useful to filter snoops.
868 * @param addr Address to check against
870 * @return If the address in question is in range
872 bool inRange(Addr addr) const;
875 * Find next request ready time from among possible sources.
877 Tick nextQueueReadyTime() const;
879 /** Block size of this cache */
880 const unsigned blkSize;
883 * The latency of tag lookup of a cache. It occurs when there is
884 * an access to the cache.
886 const Cycles lookupLatency;
889 * The latency of data access of a cache. It occurs when there is
890 * an access to the cache.
892 const Cycles dataLatency;
895 * This is the forward latency of the cache. It occurs when there
896 * is a cache miss and a request is forwarded downstream, in
897 * particular an outbound miss.
899 const Cycles forwardLatency;
901 /** The latency to fill a cache block */
902 const Cycles fillLatency;
905 * The latency of sending reponse to its upper level cache/core on
906 * a linefill. The responseLatency parameter captures this
909 const Cycles responseLatency;
912 * Whether tags and data are accessed sequentially.
914 const bool sequentialAccess;
916 /** The number of targets for each MSHR. */
919 /** Do we forward snoops from mem side port through to cpu side port? */
923 * Clusivity with respect to the upstream cache, determining if we
924 * fill into both this cache and the cache above on a miss. Note
925 * that we currently do not support strict clusivity policies.
927 const Enums::Clusivity clusivity;
930 * Is this cache read only, for example the instruction cache, or
931 * table-walker cache. A cache that is read only should never see
932 * any writes, and should never get any dirty data (and hence
933 * never have to do any writebacks).
935 const bool isReadOnly;
938 * when a data expansion of a compressed block happens it will not be
939 * able to co-allocate where it is at anymore. If true, the replacement
940 * policy is called to chose a new location for the block. Otherwise,
941 * all co-allocated blocks are evicted.
943 const bool replaceExpansions;
946 * Similar to data expansions, after a block improves its compression,
947 * it may need to be moved elsewhere compatible with the new compression
948 * factor, or, if not required by the compaction method, it may be moved
949 * to co-allocate with an existing block and thus free an entry.
951 const bool moveContractions;
954 * Bit vector of the blocking reasons for the access path.
959 /** Increasing order number assigned to each incoming request. */
962 /** Stores time the cache blocked for statistics. */
965 /** Pointer to the MSHR that has no targets. */
968 /** The number of misses to trigger an exit event. */
972 * The address range to which the cache responds on the CPU side.
973 * Normally this is all possible memory addresses. */
974 const AddrRangeList addrRanges;
977 /** System we are currently operating in. */
980 struct CacheCmdStats : public Stats::Group
982 CacheCmdStats(BaseCache &c, const std::string &name);
985 * Callback to register stats from parent
986 * CacheStats::regStats(). We can't use the normal flow since
987 * there is is no guaranteed order and CacheStats::regStats()
988 * needs to rely on these stats being initialised.
990 void regStatsFromParent();
992 const BaseCache &cache;
994 /** Number of hits per thread for each type of command.
995 @sa Packet::Command */
997 /** Number of misses per thread for each type of command.
998 @sa Packet::Command */
999 Stats::Vector misses;
1001 * Total number of cycles per thread/command spent waiting for a miss.
1002 * Used to calculate the average miss latency.
1004 Stats::Vector missLatency;
1005 /** The number of accesses per command and thread. */
1006 Stats::Formula accesses;
1007 /** The miss rate per command and thread. */
1008 Stats::Formula missRate;
1009 /** The average miss latency per command and thread. */
1010 Stats::Formula avgMissLatency;
1011 /** Number of misses that hit in the MSHRs per command and thread. */
1012 Stats::Vector mshrHits;
1013 /** Number of misses that miss in the MSHRs, per command and thread. */
1014 Stats::Vector mshrMisses;
1015 /** Number of misses that miss in the MSHRs, per command and thread. */
1016 Stats::Vector mshrUncacheable;
1017 /** Total cycle latency of each MSHR miss, per command and thread. */
1018 Stats::Vector mshrMissLatency;
1019 /** Total cycle latency of each MSHR miss, per command and thread. */
1020 Stats::Vector mshrUncacheableLatency;
1021 /** The miss rate in the MSHRs pre command and thread. */
1022 Stats::Formula mshrMissRate;
1023 /** The average latency of an MSHR miss, per command and thread. */
1024 Stats::Formula avgMshrMissLatency;
1025 /** The average latency of an MSHR miss, per command and thread. */
1026 Stats::Formula avgMshrUncacheableLatency;
1029 struct CacheStats : public Stats::Group
1031 CacheStats(BaseCache &c);
1033 void regStats() override;
1035 CacheCmdStats &cmdStats(const PacketPtr p) {
1036 return *cmd[p->cmdToIndex()];
1039 const BaseCache &cache;
1041 /** Number of hits for demand accesses. */
1042 Stats::Formula demandHits;
1043 /** Number of hit for all accesses. */
1044 Stats::Formula overallHits;
1046 /** Number of misses for demand accesses. */
1047 Stats::Formula demandMisses;
1048 /** Number of misses for all accesses. */
1049 Stats::Formula overallMisses;
1051 /** Total number of cycles spent waiting for demand misses. */
1052 Stats::Formula demandMissLatency;
1053 /** Total number of cycles spent waiting for all misses. */
1054 Stats::Formula overallMissLatency;
1056 /** The number of demand accesses. */
1057 Stats::Formula demandAccesses;
1058 /** The number of overall accesses. */
1059 Stats::Formula overallAccesses;
1061 /** The miss rate of all demand accesses. */
1062 Stats::Formula demandMissRate;
1063 /** The miss rate for all accesses. */
1064 Stats::Formula overallMissRate;
1066 /** The average miss latency for demand misses. */
1067 Stats::Formula demandAvgMissLatency;
1068 /** The average miss latency for all misses. */
1069 Stats::Formula overallAvgMissLatency;
1071 /** The total number of cycles blocked for each blocked cause. */
1072 Stats::Vector blockedCycles;
1073 /** The number of times this cache blocked for each blocked cause. */
1074 Stats::Vector blockedCauses;
1076 /** The average number of cycles blocked for each blocked cause. */
1077 Stats::Formula avgBlocked;
1079 /** The number of times a HW-prefetched block is evicted w/o
1081 Stats::Scalar unusedPrefetches;
1083 /** Number of blocks written back per thread. */
1084 Stats::Vector writebacks;
1086 /** Demand misses that hit in the MSHRs. */
1087 Stats::Formula demandMshrHits;
1088 /** Total number of misses that hit in the MSHRs. */
1089 Stats::Formula overallMshrHits;
1091 /** Demand misses that miss in the MSHRs. */
1092 Stats::Formula demandMshrMisses;
1093 /** Total number of misses that miss in the MSHRs. */
1094 Stats::Formula overallMshrMisses;
1096 /** Total number of misses that miss in the MSHRs. */
1097 Stats::Formula overallMshrUncacheable;
1099 /** Total cycle latency of demand MSHR misses. */
1100 Stats::Formula demandMshrMissLatency;
1101 /** Total cycle latency of overall MSHR misses. */
1102 Stats::Formula overallMshrMissLatency;
1104 /** Total cycle latency of overall MSHR misses. */
1105 Stats::Formula overallMshrUncacheableLatency;
1107 /** The demand miss rate in the MSHRs. */
1108 Stats::Formula demandMshrMissRate;
1109 /** The overall miss rate in the MSHRs. */
1110 Stats::Formula overallMshrMissRate;
1112 /** The average latency of a demand MSHR miss. */
1113 Stats::Formula demandAvgMshrMissLatency;
1114 /** The average overall latency of an MSHR miss. */
1115 Stats::Formula overallAvgMshrMissLatency;
1117 /** The average overall latency of an MSHR miss. */
1118 Stats::Formula overallAvgMshrUncacheableLatency;
1120 /** Number of replacements of valid blocks. */
1121 Stats::Scalar replacements;
1123 /** Number of data expansions. */
1124 Stats::Scalar dataExpansions;
1127 * Number of data contractions (blocks that had their compression
1130 Stats::Scalar dataContractions;
1132 /** Per-command statistics */
1133 std::vector<std::unique_ptr<CacheCmdStats>> cmd;
1136 /** Registers probes. */
1137 void regProbePoints() override;
1140 BaseCache(const BaseCacheParams &p, unsigned blk_size);
1143 void init() override;
1145 Port &getPort(const std::string &if_name,
1146 PortID idx=InvalidPortID) override;
1149 * Query block size of a cache.
1150 * @return The block size
1153 getBlockSize() const
1158 const AddrRangeList &getAddrRanges() const { return addrRanges; }
1160 MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool sched_send = true)
1162 MSHR *mshr = mshrQueue.allocate(pkt->getBlockAddr(blkSize), blkSize,
1164 allocOnFill(pkt->cmd));
1166 if (mshrQueue.isFull()) {
1167 setBlocked((BlockedCause)MSHRQueue_MSHRs);
1171 // schedule the send
1172 schedMemSideSendEvent(time);
1178 void allocateWriteBuffer(PacketPtr pkt, Tick time)
1180 // should only see writes or clean evicts here
1181 assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);
1183 Addr blk_addr = pkt->getBlockAddr(blkSize);
1185 // If using compression, on evictions the block is decompressed and
1186 // the operation's latency is added to the payload delay. Consume
1187 // that payload delay here, meaning that the data is always stored
1188 // uncompressed in the writebuffer
1190 time += pkt->payloadDelay;
1191 pkt->payloadDelay = 0;
1194 WriteQueueEntry *wq_entry =
1195 writeBuffer.findMatch(blk_addr, pkt->isSecure());
1196 if (wq_entry && !wq_entry->inService) {
1197 DPRINTF(Cache, "Potential to merge writeback %s", pkt->print());
1200 writeBuffer.allocate(blk_addr, blkSize, pkt, time, order++);
1202 if (writeBuffer.isFull()) {
1203 setBlocked((BlockedCause)MSHRQueue_WriteBuffer);
1206 // schedule the send
1207 schedMemSideSendEvent(time);
1211 * Returns true if the cache is blocked for accesses.
1213 bool isBlocked() const
1215 return blocked != 0;
1219 * Marks the access path of the cache as blocked for the given cause. This
1220 * also sets the blocked flag in the response interface.
1221 * @param cause The reason for the cache blocking.
1223 void setBlocked(BlockedCause cause)
1225 uint8_t flag = 1 << cause;
1227 stats.blockedCauses[cause]++;
1228 blockedCycle = curCycle();
1229 cpuSidePort.setBlocked();
1232 DPRINTF(Cache,"Blocking for cause %d, mask=%d\n", cause, blocked);
1236 * Marks the cache as unblocked for the given cause. This also clears the
1237 * blocked flags in the appropriate interfaces.
1238 * @param cause The newly unblocked cause.
1239 * @warning Calling this function can cause a blocked request on the bus to
1240 * access the cache. The cache must be in a state to handle that request.
1242 void clearBlocked(BlockedCause cause)
1244 uint8_t flag = 1 << cause;
1246 DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
1248 stats.blockedCycles[cause] += curCycle() - blockedCycle;
1249 cpuSidePort.clearBlocked();
1254 * Schedule a send event for the memory-side port. If already
1255 * scheduled, this may reschedule the event at an earlier
1256 * time. When the specified time is reached, the port is free to
1257 * send either a response, a request, or a prefetch request.
1259 * @param time The time when to attempt sending a packet.
1261 void schedMemSideSendEvent(Tick time)
1263 memSidePort.schedSendEvent(time);
1266 bool inCache(Addr addr, bool is_secure) const {
1267 return tags->findBlock(addr, is_secure);
1270 bool hasBeenPrefetched(Addr addr, bool is_secure) const {
1271 CacheBlk *block = tags->findBlock(addr, is_secure);
1273 return block->wasPrefetched();
1279 bool inMissQueue(Addr addr, bool is_secure) const {
1280 return mshrQueue.findMatch(addr, is_secure);
1283 void incMissCount(PacketPtr pkt)
1285 assert(pkt->req->requestorId() < system->maxRequestors());
1286 stats.cmdStats(pkt).misses[pkt->req->requestorId()]++;
1287 pkt->req->incAccessDepth();
1291 exitSimLoop("A cache reached the maximum miss count");
1294 void incHitCount(PacketPtr pkt)
1296 assert(pkt->req->requestorId() < system->maxRequestors());
1297 stats.cmdStats(pkt).hits[pkt->req->requestorId()]++;
1301 * Checks if the cache is coalescing writes
1303 * @return True if the cache is coalescing writes
1305 bool coalesce() const;
1309 * Cache block visitor that writes back dirty cache blocks using
1310 * functional writes.
1312 void writebackVisitor(CacheBlk &blk);
1315 * Cache block visitor that invalidates all blocks in the cache.
1317 * @warn Dirty cache lines will not be written back to memory.
1319 void invalidateVisitor(CacheBlk &blk);
1322 * Take an MSHR, turn it into a suitable downstream packet, and
1323 * send it out. This construct allows a queue entry to choose a suitable
1324 * approach based on its type.
1326 * @param mshr The MSHR to turn into a packet and send
1327 * @return True if the port is waiting for a retry
1329 virtual bool sendMSHRQueuePacket(MSHR* mshr);
1332 * Similar to sendMSHR, but for a write-queue entry
1333 * instead. Create the packet, and send it, and if successful also
1334 * mark the entry in service.
1336 * @param wq_entry The write-queue entry to turn into a packet and send
1337 * @return True if the port is waiting for a retry
1339 bool sendWriteQueuePacket(WriteQueueEntry* wq_entry);
1342 * Serialize the state of the caches
1344 * We currently don't support checkpointing cache state, so this panics.
1346 void serialize(CheckpointOut &cp) const override;
1347 void unserialize(CheckpointIn &cp) override;
1351 * The write allocator inspects write packets and detects streaming
1352 * patterns. The write allocator supports a single stream where writes
1353 * are expected to access consecutive locations and keeps track of
1354 * size of the area covered by the concecutive writes in byteCount.
1356 * 1) When byteCount has surpassed the coallesceLimit the mode
1357 * switches from ALLOCATE to COALESCE where writes should be delayed
1358 * until the whole block is written at which point a single packet
1359 * (whole line write) can service them.
1361 * 2) When byteCount has also exceeded the noAllocateLimit (whole
1362 * line) we switch to NO_ALLOCATE when writes should not allocate in
1363 * the cache but rather send a whole line write to the memory below.
1365 class WriteAllocator : public SimObject {
1367 WriteAllocator(const WriteAllocatorParams &p) :
1369 coalesceLimit(p.coalesce_limit * p.block_size),
1370 noAllocateLimit(p.no_allocate_limit * p.block_size),
1371 delayThreshold(p.delay_threshold)
1377 * Should writes be coalesced? This is true if the mode is set to
1380 * @return return true if the cache should coalesce writes.
1382 bool coalesce() const {
1383 return mode != WriteMode::ALLOCATE;
1387 * Should writes allocate?
1389 * @return return true if the cache should not allocate for writes.
1391 bool allocate() const {
1392 return mode != WriteMode::NO_ALLOCATE;
1396 * Reset the write allocator state, meaning that it allocates for
1397 * writes and has not recorded any information about qualifying
1398 * writes that might trigger a switch to coalescing and later no
1402 mode = WriteMode::ALLOCATE;
1408 * Access whether we need to delay the current write.
1410 * @param blk_addr The block address the packet writes to
1411 * @return true if the current packet should be delayed
1413 bool delay(Addr blk_addr) {
1414 if (delayCtr[blk_addr] > 0) {
1415 --delayCtr[blk_addr];
1423 * Clear delay counter for the input block
1425 * @param blk_addr The accessed cache block
1427 void resetDelay(Addr blk_addr) {
1428 delayCtr.erase(blk_addr);
1432 * Update the write mode based on the current write
1433 * packet. This method compares the packet's address with any
1434 * current stream, and updates the tracking and the mode
1437 * @param write_addr Start address of the write request
1438 * @param write_size Size of the write request
1439 * @param blk_addr The block address that this packet writes to
1441 void updateMode(Addr write_addr, unsigned write_size, Addr blk_addr);
1445 * The current mode for write coalescing and allocation, either
1446 * normal operation (ALLOCATE), write coalescing (COALESCE), or
1447 * write coalescing without allocation (NO_ALLOCATE).
1449 enum class WriteMode : char {
1456 /** Address to match writes against to detect streams. */
1460 * Bytes written contiguously. Saturating once we no longer
1466 * Limits for when to switch between the different write modes.
1468 const uint32_t coalesceLimit;
1469 const uint32_t noAllocateLimit;
1471 * The number of times the allocator will delay an WriteReq MSHR.
1473 const uint32_t delayThreshold;
1476 * Keep track of the number of times the allocator has delayed an
1479 std::unordered_map<Addr, Counter> delayCtr;
1482 #endif //__MEM_CACHE_BASE_HH__