From: Daniel R. Carvalho <odanrc@yahoo.com.br>
Date: Tue, 19 Jun 2018 15:08:35 +0000 (+0200)
Subject: mem-cache: Add compression and decompression calls
X-Git-Tag: v19.0.0.0~874
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a39af1f0ac6d324b4c206d4db18c39ea557bb931;p=gem5.git

mem-cache: Add compression and decompression calls

Add a compressor to the base cache class and compress within
block allocation and decompress on writebacks.

This change does not implement data expansion (fat writes) yet,
nor it adds the compression latency to the block write time.

Change-Id: Ie36db65f7487c9b05ec4aedebc2c7651b4cb4821
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/11410
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
---

diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py
index b2f478472..7a28136b5 100644
--- a/src/mem/cache/Cache.py
+++ b/src/mem/cache/Cache.py
@@ -44,11 +44,11 @@ from m5.proxy import *
 from m5.SimObject import SimObject
 
 from m5.objects.ClockedObject import ClockedObject
+from m5.objects.Compressors import BaseCacheCompressor
 from m5.objects.Prefetcher import BasePrefetcher
 from m5.objects.ReplacementPolicies import *
 from m5.objects.Tags import *
 
-
 # Enum for cache clusivity, currently mostly inclusive or mostly
 # exclusive.
 class Clusivity(Enum): vals = ['mostly_incl', 'mostly_excl']
@@ -105,6 +105,8 @@ class BaseCache(ClockedObject):
     replacement_policy = Param.BaseReplacementPolicy(LRURP(),
         "Replacement policy")
 
+    compressor = Param.BaseCacheCompressor(NULL, "Cache compressor.")
+
     sequential_access = Param.Bool(False,
         "Whether to access tags and data sequentially")
 
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 36968a18d..e2149dbb4 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -54,6 +54,7 @@
 #include "debug/CachePort.hh"
 #include "debug/CacheRepl.hh"
 #include "debug/CacheVerbose.hh"
+#include "mem/cache/compressors/base.hh"
 #include "mem/cache/mshr.hh"
 #include "mem/cache/prefetch/base.hh"
 #include "mem/cache/queue_entry.hh"
@@ -83,6 +84,7 @@ BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
       mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
       writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
       tags(p->tags),
+      compressor(p->compressor),
       prefetcher(p->prefetcher),
       writeAllocator(p->write_allocator),
       writebackClean(p->writeback_clean),
@@ -1034,7 +1036,16 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
             }
 
             blk->status |= BlkReadable;
+        } else {
+            if (compressor) {
+                // This is an overwrite to an existing block, therefore we need
+                // to check for data expansion (i.e., block was compressed with
+                // a smaller size, and now it doesn't fit the entry anymore).
+                // If that is the case we might need to evict blocks.
+                // @todo Update compression data
+            }
         }
+
         // only mark the block dirty if we got a writeback command,
         // and leave it as is for a clean writeback
         if (pkt->cmd == MemCmd::WritebackDirty) {
@@ -1114,6 +1125,10 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
 
                 blk->status |= BlkReadable;
             }
+        } else {
+            if (compressor) {
+                // @todo Update compression data
+            }
         }
 
         // at this point either this is a writeback or a write-through
@@ -1151,6 +1166,12 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
         // Calculate access latency based on the need to access the data array
         if (pkt->isRead() || pkt->isWrite()) {
             lat = calculateAccessLatency(blk, pkt->headerDelay, tag_latency);
+
+            // When a block is compressed, it must first be decompressed
+            // before being read. This adds to the access latency.
+            if (compressor && pkt->isRead()) {
+                lat += compressor->getDecompressionLatency(blk);
+            }
         } else {
             lat = calculateTagOnlyLatency(pkt->headerDelay, tag_latency);
         }
@@ -1319,8 +1340,22 @@ BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
     // Get secure bit
     const bool is_secure = pkt->isSecure();
 
-    // @todo Compress and get compression related data
+    // Block size and compression related access latency. Only relevant if
+    // using a compressor, otherwise there is no extra delay, and the block
+    // is fully sized
     std::size_t blk_size_bits = blkSize*8;
+    Cycles compression_lat = Cycles(0);
+    Cycles decompression_lat = Cycles(0);
+
+    // If a compressor is being used, it is called to compress data before
+    // insertion. Although in Gem5 the data is stored uncompressed, even if a
+    // compressor is used, the compression/decompression methods are called to
+    // calculate the amount of extra cycles needed to read or write compressed
+    // blocks.
+    if (compressor) {
+        compressor->compress(pkt->getConstPtr<uint64_t>(), compression_lat,
+                             decompression_lat, blk_size_bits);
+    }
 
     // Find replacement victim
     std::vector<CacheBlk*> evict_blks;
@@ -1377,6 +1412,13 @@ BaseCache::allocateBlock(const PacketPtr pkt, PacketList &writebacks)
         replacements++;
     }
 
+    // If using a compressor, set compression data. This must be done before
+    // block insertion, as compressed tags use this information.
+    if (compressor) {
+        compressor->setSizeBits(victim, blk_size_bits);
+        compressor->setDecompressionLatency(victim, decompression_lat);
+    }
+
     // Insert new block at victimized entry
     tags->insertBlock(pkt, victim);
 
@@ -1443,6 +1485,12 @@ BaseCache::writebackBlk(CacheBlk *blk)
     pkt->allocate();
     pkt->setDataFromBlock(blk->data, blkSize);
 
+    // When a block is compressed, it must first be decompressed before being
+    // sent for writeback.
+    if (compressor) {
+        pkt->payloadDelay = compressor->getDecompressionLatency(blk);
+    }
+
     return pkt;
 }
 
@@ -1482,6 +1530,12 @@ BaseCache::writecleanBlk(CacheBlk *blk, Request::Flags dest, PacketId id)
     pkt->allocate();
     pkt->setDataFromBlock(blk->data, blkSize);
 
+    // When a block is compressed, it must first be decompressed before being
+    // sent for writeback.
+    if (compressor) {
+        pkt->payloadDelay = compressor->getDecompressionLatency(blk);
+    }
+
     return pkt;
 }
 
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index b995a6e47..02b9e2d2e 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -64,6 +64,7 @@
 #include "debug/CachePort.hh"
 #include "enums/Clusivity.hh"
 #include "mem/cache/cache_blk.hh"
+#include "mem/cache/compressors/base.hh"
 #include "mem/cache/mshr_queue.hh"
 #include "mem/cache/tags/base.hh"
 #include "mem/cache/write_queue.hh"
@@ -324,6 +325,9 @@ class BaseCache : public ClockedObject
     /** Tag and data Storage */
     BaseTags *tags;
 
+    /** Compression method being used. */
+    BaseCacheCompressor* compressor;
+
     /** Prefetcher */
     BasePrefetcher *prefetcher;
 
@@ -1070,6 +1074,15 @@ class BaseCache : public ClockedObject
 
         Addr blk_addr = pkt->getBlockAddr(blkSize);
 
+        // If using compression, on evictions the block is decompressed and
+        // the operation's latency is added to the payload delay. Consume
+        // that payload delay here, meaning that the data is always stored
+        // uncompressed in the writebuffer
+        if (compressor) {
+            time += pkt->payloadDelay;
+            pkt->payloadDelay = 0;
+        }
+
         WriteQueueEntry *wq_entry =
             writeBuffer.findMatch(blk_addr, pkt->isSecure());
         if (wq_entry && !wq_entry->inService) {
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 4643e1d35..a601a7be3 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -1129,6 +1129,12 @@ Cache::handleSnoop(PacketPtr pkt, CacheBlk *blk, bool is_timing,
             if (pkt->hasData())
                 pkt->setDataFromBlock(blk->data, blkSize);
         }
+
+        // When a block is compressed, it must first be decompressed before
+        // being read, and this increases the snoop delay.
+        if (compressor && pkt->isRead()) {
+            snoop_delay += compressor->getDecompressionLatency(blk);
+        }
     }
 
     if (!respond && is_deferred) {
diff --git a/src/mem/cache/compressors/base.cc b/src/mem/cache/compressors/base.cc
index 1ba2677ea..40244e249 100644
--- a/src/mem/cache/compressors/base.cc
+++ b/src/mem/cache/compressors/base.cc
@@ -111,13 +111,16 @@ BaseCacheCompressor::compress(const uint64_t* data, Cycles& comp_lat,
 }
 
 Cycles
-BaseCacheCompressor::getDecompressionLatency(const CacheBlk* blk)
+BaseCacheCompressor::getDecompressionLatency(const CacheBlk* blk) const
 {
     const CompressionBlk* comp_blk = static_cast<const CompressionBlk*>(blk);
 
     // If block is compressed, return its decompression latency
     if (comp_blk && comp_blk->isCompressed()){
-        return comp_blk->getDecompressionLatency();
+        const Cycles decomp_lat = comp_blk->getDecompressionLatency();
+        DPRINTF(CacheComp, "Decompressing block: %s (%d cycles)\n",
+                comp_blk->print(), decomp_lat);
+        return decomp_lat;
     }
 
     // Block is not compressed, so there is no decompression latency
diff --git a/src/mem/cache/compressors/base.hh b/src/mem/cache/compressors/base.hh
index a19a0727a..f457ecdc4 100644
--- a/src/mem/cache/compressors/base.hh
+++ b/src/mem/cache/compressors/base.hh
@@ -132,7 +132,7 @@ class BaseCacheCompressor : public SimObject {
      *
      * @param blk The compressed block.
      */
-    static Cycles getDecompressionLatency(const CacheBlk* blk);
+    Cycles getDecompressionLatency(const CacheBlk* blk) const;
 
     /**
      * Set the decompression latency of compressed block.
diff --git a/src/mem/cache/tags/compressed_tags.cc b/src/mem/cache/tags/compressed_tags.cc
index cc799df24..46043be68 100644
--- a/src/mem/cache/tags/compressed_tags.cc
+++ b/src/mem/cache/tags/compressed_tags.cc
@@ -37,6 +37,7 @@
 
 #include "mem/cache/replacement_policies/base.hh"
 #include "mem/cache/tags/indexing_policies/base.hh"
+#include "mem/packet.hh"
 #include "params/CompressedTags.hh"
 
 CompressedTags::CompressedTags(const Params *p)
@@ -92,6 +93,17 @@ CompressedTags::tagsInit()
     }
 }
 
+void
+CompressedTags::insertBlock(const PacketPtr pkt, CacheBlk *blk)
+{
+    // Insert block
+    SectorTags::insertBlock(pkt, blk);
+
+    // @todo We always store compressed blocks when possible
+    CompressionBlk* compression_blk = static_cast<CompressionBlk*>(blk);
+    compression_blk->setUncompressed();
+}
+
 void
 CompressedTags::forEachBlk(std::function<void(CacheBlk &)> visitor)
 {
diff --git a/src/mem/cache/tags/compressed_tags.hh b/src/mem/cache/tags/compressed_tags.hh
index 0bf96b55e..303bc79d8 100644
--- a/src/mem/cache/tags/compressed_tags.hh
+++ b/src/mem/cache/tags/compressed_tags.hh
@@ -96,6 +96,14 @@ class CompressedTags : public SectorTags
      */
     void tagsInit() override;
 
+    /**
+     * Insert the new block into the cache and update replacement data.
+     *
+     * @param pkt Packet holding the address to update
+     * @param blk The block to update.
+     */
+    void insertBlock(const PacketPtr pkt, CacheBlk *blk) override;
+
     /**
      * Visit each sub-block in the tags and apply a visitor.
      *