mem-cache: Use shouldAllocate() instead of CPack's decompress()

[gem5.git] / src / mem / cache / base.hh
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh

index bf190a5912ecea260590fa204ad3c918181c5650..cd467c8ad6bb685f4fafc8636e50751966bfa3c1 100644 (file)
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2013, 2015-2016, 2018 ARM Limited
+ * Copyright (c) 2012-2013, 2015-2016, 2018-2019 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -81,9 +81,7 @@
  #include "sim/sim_exit.hh"
  #include "sim/system.hh"
  
-class BaseMasterPort;
  class BasePrefetcher;
-class BaseSlavePort;
  class MSHR;
  class MasterPort;
  class QueueEntry;
@@ -454,9 +452,11 @@ class BaseCache : public ClockedObject
       * @param pkt The memory request to perform.
       * @param blk The cache block to be updated.
       * @param lat The latency of the access.
+     * @param writebacks List for any writebacks that need to be performed.
       * @return Boolean indicating whether the request was satisfied.
       */
-    virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat);
+    virtual bool access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
+                        PacketList &writebacks);
  
      /*
       * Handle a timing request that hit in the cache
@@ -549,9 +549,11 @@ class BaseCache : public ClockedObject
       *
       * @param pkt The packet with the requests
       * @param blk The referenced block
+     * @param writebacks A list with packets for any performed writebacks
       * @return Cycles for handling the request
       */
-    virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk) = 0;
+    virtual Cycles handleAtomicReqMiss(PacketPtr pkt, CacheBlk *&blk,
+                                       PacketList &writebacks) = 0;
  
      /**
       * Performs the access specified by the request.
@@ -591,18 +593,13 @@ class BaseCache : public ClockedObject
  
      /**
       * Insert writebacks into the write buffer
-     *
-     * @param pkt The writeback packet.
-     * @param forward_time Tick to which the writeback should be scheduled.
       */
-    virtual void doWritebacks(PacketPtr pkt, Tick forward_time) = 0;
+    virtual void doWritebacks(PacketList& writebacks, Tick forward_time) = 0;
  
      /**
-     * Send writebacks down the memory hierarchy in atomic mode.
-     *
-     * @param pkt The writeback packet.
+     * Send writebacks down the memory hierarchy in atomic mode
       */
-    virtual void doWritebacksAtomic(PacketPtr pkt) = 0;
+    virtual void doWritebacksAtomic(PacketList& writebacks) = 0;
  
      /**
       * Create an appropriate downstream bus request packet.
@@ -648,7 +645,8 @@ class BaseCache : public ClockedObject
       */
      void writebackTempBlockAtomic() {
          assert(tempBlockWriteback != nullptr);
-        doWritebacksAtomic(tempBlockWriteback);
+        PacketList writebacks{tempBlockWriteback};
+        doWritebacksAtomic(writebacks);
          tempBlockWriteback = nullptr;
      }
  
@@ -680,12 +678,11 @@ class BaseCache : public ClockedObject
       *
       * @param blk The block to be overwriten.
       * @param data A pointer to the data to be compressed (blk's new data).
-     * @param delay The delay until the packet's metadata is present.
-     * @param tag_latency Latency to access the tags of the replacement victim.
+     * @param writebacks List for any writebacks that need to be performed.
       * @return Whether operation is successful or not.
       */
      bool updateCompressionData(CacheBlk *blk, const uint64_t* data,
-        uint32_t delay, Cycles tag_latency);
+                               PacketList &writebacks);
  
      /**
       * Perform any necessary updates to the block and perform any data
@@ -718,27 +715,34 @@ class BaseCache : public ClockedObject
       * Populates a cache block and handles all outstanding requests for the
       * satisfied fill request. This version takes two memory requests. One
       * contains the fill data, the other is an optional target to satisfy.
+     * Note that the reason we return a list of writebacks rather than
+     * inserting them directly in the write buffer is that this function
+     * is called by both atomic and timing-mode accesses, and in atomic
+     * mode we don't mess with the write buffer (we just perform the
+     * writebacks atomically once the original request is complete).
       *
       * @param pkt The memory request with the fill data.
       * @param blk The cache block if it already exists.
+     * @param writebacks List for any writebacks that need to be performed.
       * @param allocate Whether to allocate a block or use the temp block
       * @return Pointer to the new cache block.
       */
-    CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk, bool allocate);
+    CacheBlk *handleFill(PacketPtr pkt, CacheBlk *blk,
+                         PacketList &writebacks, bool allocate);
  
      /**
-     * Allocate a new block for the packet's data. The victim block might be
-     * valid, and thus the necessary writebacks are done. May return nullptr
-     * if there are no replaceable blocks. If a replaceable block is found,
-     * it inserts the new block in its place. The new block, however, is not
-     * set as valid yet.
+     * Allocate a new block and perform any necessary writebacks
+     *
+     * Find a victim block and if necessary prepare writebacks for any
+     * existing data. May return nullptr if there are no replaceable
+     * blocks. If a replaceable block is found, it inserts the new block in
+     * its place. The new block, however, is not set as valid yet.
       *
       * @param pkt Packet holding the address to update
-     * @param tag_latency Latency to access the tags of the replacement victim.
+     * @param writebacks A list of writeback packets for the evicted blocks
       * @return the allocated block
       */
-    CacheBlk *allocateBlock(const PacketPtr pkt, Cycles tag_latency);
-
+    CacheBlk *allocateBlock(const PacketPtr pkt, PacketList &writebacks);
      /**
       * Evict a cache block.
       *
@@ -755,10 +759,9 @@ class BaseCache : public ClockedObject
       * Performs a writeback if necesssary and invalidates the block
       *
       * @param blk Block to invalidate
-     * @param forward_time Tick to which the writeback should be scheduled if
-     *                     in timing mode.
+     * @param writebacks Return a list of packets with writebacks
       */
-    void evictBlock(CacheBlk *blk, Tick forward_time);
+    void evictBlock(CacheBlk *blk, PacketList &writebacks);
  
      /**
       * Invalidate a cache block.
@@ -909,146 +912,155 @@ class BaseCache : public ClockedObject
      /** System we are currently operating in. */
      System *system;
  
-    // Statistics
-    /**
-     * @addtogroup CacheStatistics
-     * @{
-     */
-
-    /** Number of hits per thread for each type of command.
-        @sa Packet::Command */
-    Stats::Vector hits[MemCmd::NUM_MEM_CMDS];
-    /** Number of hits for demand accesses. */
-    Stats::Formula demandHits;
-    /** Number of hit for all accesses. */
-    Stats::Formula overallHits;
-
-    /** Number of misses per thread for each type of command.
-        @sa Packet::Command */
-    Stats::Vector misses[MemCmd::NUM_MEM_CMDS];
-    /** Number of misses for demand accesses. */
-    Stats::Formula demandMisses;
-    /** Number of misses for all accesses. */
-    Stats::Formula overallMisses;
-
-    /**
-     * Total number of cycles per thread/command spent waiting for a miss.
-     * Used to calculate the average miss latency.
-     */
-    Stats::Vector missLatency[MemCmd::NUM_MEM_CMDS];
-    /** Total number of cycles spent waiting for demand misses. */
-    Stats::Formula demandMissLatency;
-    /** Total number of cycles spent waiting for all misses. */
-    Stats::Formula overallMissLatency;
-
-    /** The number of accesses per command and thread. */
-    Stats::Formula accesses[MemCmd::NUM_MEM_CMDS];
-    /** The number of demand accesses. */
-    Stats::Formula demandAccesses;
-    /** The number of overall accesses. */
-    Stats::Formula overallAccesses;
-
-    /** The miss rate per command and thread. */
-    Stats::Formula missRate[MemCmd::NUM_MEM_CMDS];
-    /** The miss rate of all demand accesses. */
-    Stats::Formula demandMissRate;
-    /** The miss rate for all accesses. */
-    Stats::Formula overallMissRate;
-
-    /** The average miss latency per command and thread. */
-    Stats::Formula avgMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average miss latency for demand misses. */
-    Stats::Formula demandAvgMissLatency;
-    /** The average miss latency for all misses. */
-    Stats::Formula overallAvgMissLatency;
-
-    /** The total number of cycles blocked for each blocked cause. */
-    Stats::Vector blocked_cycles;
-    /** The number of times this cache blocked for each blocked cause. */
-    Stats::Vector blocked_causes;
-
-    /** The average number of cycles blocked for each blocked cause. */
-    Stats::Formula avg_blocked;
-
-    /** The number of times a HW-prefetched block is evicted w/o reference. */
-    Stats::Scalar unusedPrefetches;
-
-    /** Number of blocks written back per thread. */
-    Stats::Vector writebacks;
-
-    /** Number of misses that hit in the MSHRs per command and thread. */
-    Stats::Vector mshr_hits[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that hit in the MSHRs. */
-    Stats::Formula demandMshrHits;
-    /** Total number of misses that hit in the MSHRs. */
-    Stats::Formula overallMshrHits;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector mshr_misses[MemCmd::NUM_MEM_CMDS];
-    /** Demand misses that miss in the MSHRs. */
-    Stats::Formula demandMshrMisses;
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrMisses;
-
-    /** Number of misses that miss in the MSHRs, per command and thread. */
-    Stats::Vector mshr_uncacheable[MemCmd::NUM_MEM_CMDS];
-    /** Total number of misses that miss in the MSHRs. */
-    Stats::Formula overallMshrUncacheable;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector mshr_miss_latency[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of demand MSHR misses. */
-    Stats::Formula demandMshrMissLatency;
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrMissLatency;
-
-    /** Total cycle latency of each MSHR miss, per command and thread. */
-    Stats::Vector mshr_uncacheable_lat[MemCmd::NUM_MEM_CMDS];
-    /** Total cycle latency of overall MSHR misses. */
-    Stats::Formula overallMshrUncacheableLatency;
-
-#if 0
-    /** The total number of MSHR accesses per command and thread. */
-    Stats::Formula mshrAccesses[MemCmd::NUM_MEM_CMDS];
-    /** The total number of demand MSHR accesses. */
-    Stats::Formula demandMshrAccesses;
-    /** The total number of MSHR accesses. */
-    Stats::Formula overallMshrAccesses;
-#endif
-
-    /** The miss rate in the MSHRs pre command and thread. */
-    Stats::Formula mshrMissRate[MemCmd::NUM_MEM_CMDS];
-    /** The demand miss rate in the MSHRs. */
-    Stats::Formula demandMshrMissRate;
-    /** The overall miss rate in the MSHRs. */
-    Stats::Formula overallMshrMissRate;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrMissLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average latency of a demand MSHR miss. */
-    Stats::Formula demandAvgMshrMissLatency;
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrMissLatency;
-
-    /** The average latency of an MSHR miss, per command and thread. */
-    Stats::Formula avgMshrUncacheableLatency[MemCmd::NUM_MEM_CMDS];
-    /** The average overall latency of an MSHR miss. */
-    Stats::Formula overallAvgMshrUncacheableLatency;
-
-    /** Number of replacements of valid blocks. */
-    Stats::Scalar replacements;
-
-    /** Number of data expansions. */
-    Stats::Scalar dataExpansions;
-
-    /**
-     * @}
-     */
-
-    /**
-     * Register stats for this object.
-     */
-    void regStats() override;
+    struct CacheCmdStats : public Stats::Group
+    {
+        CacheCmdStats(BaseCache &c, const std::string &name);
+
+        /**
+         * Callback to register stats from parent
+         * CacheStats::regStats(). We can't use the normal flow since
+         * there is is no guaranteed order and CacheStats::regStats()
+         * needs to rely on these stats being initialised.
+         */
+        void regStatsFromParent();
+
+        const BaseCache &cache;
+
+        /** Number of hits per thread for each type of command.
+            @sa Packet::Command */
+        Stats::Vector hits;
+        /** Number of misses per thread for each type of command.
+            @sa Packet::Command */
+        Stats::Vector misses;
+        /**
+         * Total number of cycles per thread/command spent waiting for a miss.
+         * Used to calculate the average miss latency.
+         */
+        Stats::Vector missLatency;
+        /** The number of accesses per command and thread. */
+        Stats::Formula accesses;
+        /** The miss rate per command and thread. */
+        Stats::Formula missRate;
+        /** The average miss latency per command and thread. */
+        Stats::Formula avgMissLatency;
+        /** Number of misses that hit in the MSHRs per command and thread. */
+        Stats::Vector mshr_hits;
+        /** Number of misses that miss in the MSHRs, per command and thread. */
+        Stats::Vector mshr_misses;
+        /** Number of misses that miss in the MSHRs, per command and thread. */
+        Stats::Vector mshr_uncacheable;
+        /** Total cycle latency of each MSHR miss, per command and thread. */
+        Stats::Vector mshr_miss_latency;
+        /** Total cycle latency of each MSHR miss, per command and thread. */
+        Stats::Vector mshr_uncacheable_lat;
+        /** The miss rate in the MSHRs pre command and thread. */
+        Stats::Formula mshrMissRate;
+        /** The average latency of an MSHR miss, per command and thread. */
+        Stats::Formula avgMshrMissLatency;
+        /** The average latency of an MSHR miss, per command and thread. */
+        Stats::Formula avgMshrUncacheableLatency;
+    };
+
+    struct CacheStats : public Stats::Group
+    {
+        CacheStats(BaseCache &c);
+
+        void regStats() override;
+
+        CacheCmdStats &cmdStats(const PacketPtr p) {
+            return *cmd[p->cmdToIndex()];
+        }
+
+        const BaseCache &cache;
+
+        /** Number of hits for demand accesses. */
+        Stats::Formula demandHits;
+        /** Number of hit for all accesses. */
+        Stats::Formula overallHits;
+
+        /** Number of misses for demand accesses. */
+        Stats::Formula demandMisses;
+        /** Number of misses for all accesses. */
+        Stats::Formula overallMisses;
+
+        /** Total number of cycles spent waiting for demand misses. */
+        Stats::Formula demandMissLatency;
+        /** Total number of cycles spent waiting for all misses. */
+        Stats::Formula overallMissLatency;
+
+        /** The number of demand accesses. */
+        Stats::Formula demandAccesses;
+        /** The number of overall accesses. */
+        Stats::Formula overallAccesses;
+
+        /** The miss rate of all demand accesses. */
+        Stats::Formula demandMissRate;
+        /** The miss rate for all accesses. */
+        Stats::Formula overallMissRate;
+
+        /** The average miss latency for demand misses. */
+        Stats::Formula demandAvgMissLatency;
+        /** The average miss latency for all misses. */
+        Stats::Formula overallAvgMissLatency;
+
+        /** The total number of cycles blocked for each blocked cause. */
+        Stats::Vector blocked_cycles;
+        /** The number of times this cache blocked for each blocked cause. */
+        Stats::Vector blocked_causes;
+
+        /** The average number of cycles blocked for each blocked cause. */
+        Stats::Formula avg_blocked;
+
+        /** The number of times a HW-prefetched block is evicted w/o
+         * reference. */
+        Stats::Scalar unusedPrefetches;
+
+        /** Number of blocks written back per thread. */
+        Stats::Vector writebacks;
+
+        /** Demand misses that hit in the MSHRs. */
+        Stats::Formula demandMshrHits;
+        /** Total number of misses that hit in the MSHRs. */
+        Stats::Formula overallMshrHits;
+
+        /** Demand misses that miss in the MSHRs. */
+        Stats::Formula demandMshrMisses;
+        /** Total number of misses that miss in the MSHRs. */
+        Stats::Formula overallMshrMisses;
+
+        /** Total number of misses that miss in the MSHRs. */
+        Stats::Formula overallMshrUncacheable;
+
+        /** Total cycle latency of demand MSHR misses. */
+        Stats::Formula demandMshrMissLatency;
+        /** Total cycle latency of overall MSHR misses. */
+        Stats::Formula overallMshrMissLatency;
+
+        /** Total cycle latency of overall MSHR misses. */
+        Stats::Formula overallMshrUncacheableLatency;
+
+        /** The demand miss rate in the MSHRs. */
+        Stats::Formula demandMshrMissRate;
+        /** The overall miss rate in the MSHRs. */
+        Stats::Formula overallMshrMissRate;
+
+        /** The average latency of a demand MSHR miss. */
+        Stats::Formula demandAvgMshrMissLatency;
+        /** The average overall latency of an MSHR miss. */
+        Stats::Formula overallAvgMshrMissLatency;
+
+        /** The average overall latency of an MSHR miss. */
+        Stats::Formula overallAvgMshrUncacheableLatency;
+
+        /** Number of replacements of valid blocks. */
+        Stats::Scalar replacements;
+
+        /** Number of data expansions. */
+        Stats::Scalar dataExpansions;
+
+        /** Per-command statistics */
+        std::vector<std::unique_ptr<CacheCmdStats>> cmd;
+    } stats;
  
      /** Registers probes. */
      void regProbePoints() override;
@@ -1141,7 +1153,7 @@ class BaseCache : public ClockedObject
      {
          uint8_t flag = 1 << cause;
          if (blocked == 0) {
-            blocked_causes[cause]++;
+            stats.blocked_causes[cause]++;
              blockedCycle = curCycle();
              cpuSidePort.setBlocked();
          }
@@ -1162,7 +1174,7 @@ class BaseCache : public ClockedObject
          blocked &= ~flag;
          DPRINTF(Cache,"Unblocking for cause %d, mask=%d\n", cause, blocked);
          if (blocked == 0) {
-            blocked_cycles[cause] += curCycle() - blockedCycle;
+            stats.blocked_cycles[cause] += curCycle() - blockedCycle;
              cpuSidePort.clearBlocked();
          }
      }
@@ -1200,7 +1212,7 @@ class BaseCache : public ClockedObject
      void incMissCount(PacketPtr pkt)
      {
          assert(pkt->req->masterId() < system->maxMasters());
-        misses[pkt->cmdToIndex()][pkt->req->masterId()]++;
+        stats.cmdStats(pkt).misses[pkt->req->masterId()]++;
          pkt->req->incAccessDepth();
          if (missCount) {
              --missCount;
@@ -1211,8 +1223,7 @@ class BaseCache : public ClockedObject
      void incHitCount(PacketPtr pkt)
      {
          assert(pkt->req->masterId() < system->maxMasters());
-        hits[pkt->cmdToIndex()][pkt->req->masterId()]++;
-
+        stats.cmdStats(pkt).hits[pkt->req->masterId()]++;
      }
  
      /**