// The compressor is called to compress the updated data, so that its
// metadata can be updated.
- std::size_t compression_size = 0;
Cycles compression_lat = Cycles(0);
Cycles decompression_lat = Cycles(0);
- compressor->compress(data, compression_lat, decompression_lat,
- compression_size);
+ const auto comp_data =
+ compressor->compress(data, compression_lat, decompression_lat);
+ std::size_t compression_size = comp_data->getSizeBits();
// If block's compression factor increased, it may not be co-allocatable
// anymore. If so, some blocks might need to be evicted to make room for
// calculate the amount of extra cycles needed to read or write compressed
// blocks.
if (compressor && pkt->hasData()) {
- compressor->compress(pkt->getConstPtr<uint64_t>(), compression_lat,
- decompression_lat, blk_size_bits);
+ const auto comp_data = compressor->compress(
+ pkt->getConstPtr<uint64_t>(), compression_lat, decompression_lat);
+ blk_size_bits = comp_data->getSizeBits();
}
// Find replacement victim
cxx_header = "mem/cache/compressors/base.hh"
block_size = Param.Int(Parent.cache_line_size, "Block size in bytes")
+ chunk_size_bits = Param.Unsigned(32,
+ "Size of a parsing data chunk (in bits)")
size_threshold = Param.Unsigned(Parent.cache_line_size, "Minimum size, "
"in bytes, in which a block must be compressed to. Otherwise it is "
"stored in its uncompressed state")
cxx_class = 'Compressor::Base64Delta8'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 64
+
class Base64Delta16(BaseDictionaryCompressor):
type = 'Base64Delta16'
cxx_class = 'Compressor::Base64Delta16'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 64
+
class Base64Delta32(BaseDictionaryCompressor):
type = 'Base64Delta32'
cxx_class = 'Compressor::Base64Delta32'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 64
+
class Base32Delta8(BaseDictionaryCompressor):
type = 'Base32Delta8'
cxx_class = 'Compressor::Base32Delta8'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 32
+
class Base32Delta16(BaseDictionaryCompressor):
type = 'Base32Delta16'
cxx_class = 'Compressor::Base32Delta16'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 32
+
class Base16Delta8(BaseDictionaryCompressor):
type = 'Base16Delta8'
cxx_class = 'Compressor::Base16Delta8'
cxx_header = "mem/cache/compressors/base_delta.hh"
+ chunk_size_bits = 16
+
class CPack(BaseDictionaryCompressor):
type = 'CPack'
cxx_class = 'Compressor::CPack'
cxx_class = 'Compressor::Perfect'
cxx_header = "mem/cache/compressors/perfect.hh"
+ chunk_size_bits = 64
max_compression_ratio = Param.Int(2,
"Maximum compression ratio allowed")
compression_latency = Param.Cycles(1,
cxx_class = 'Compressor::RepeatedQwords'
cxx_header = "mem/cache/compressors/repeated_qwords.hh"
+ chunk_size_bits = 64
+
class ZeroCompressor(BaseDictionaryCompressor):
type = 'ZeroCompressor'
cxx_class = 'Compressor::Zero'
cxx_header = "mem/cache/compressors/zero.hh"
+ chunk_size_bits = 64
+
class BDI(MultiCompressor):
compressors = [ZeroCompressor(), RepeatedQwordsCompressor(),
Base64Delta8(), Base64Delta16(), Base64Delta32(), Base32Delta8(),
#include "mem/cache/compressors/base.hh"
#include <algorithm>
+#include <climits>
#include <cmath>
#include <cstdint>
#include <string>
}
Base::Base(const Params *p)
- : SimObject(p), blkSize(p->block_size), sizeThreshold(p->size_threshold),
+ : SimObject(p), blkSize(p->block_size), chunkSizeBits(p->chunk_size_bits),
+ sizeThreshold(p->size_threshold),
stats(*this)
{
+ fatal_if(64 % chunkSizeBits,
+ "64 must be a multiple of the chunk granularity.");
+
fatal_if(blkSize < sizeThreshold, "Compressed data must fit in a block");
}
+std::vector<Base::Chunk>
+Base::toChunks(const uint64_t* data) const
+{
+ // Number of chunks in a 64-bit value
+ const unsigned num_chunks_per_64 =
+ (sizeof(uint64_t) * CHAR_BIT) / chunkSizeBits;
+
+ // Turn a 64-bit array into a chunkSizeBits-array
+ std::vector<Chunk> chunks((blkSize * CHAR_BIT) / chunkSizeBits, 0);
+ for (int i = 0; i < chunks.size(); i++) {
+ const int index_64 = std::floor(i / (double)num_chunks_per_64);
+ const unsigned start = i % num_chunks_per_64;
+ chunks[i] = bits(data[index_64],
+ (start + 1) * chunkSizeBits - 1, start * chunkSizeBits);
+ }
+
+ return chunks;
+}
+
void
-Base::compress(const uint64_t* data, Cycles& comp_lat,
- Cycles& decomp_lat, std::size_t& comp_size_bits)
+Base::fromChunks(const std::vector<Chunk>& chunks, uint64_t* data) const
+{
+ // Number of chunks in a 64-bit value
+ const unsigned num_chunks_per_64 =
+ (sizeof(uint64_t) * CHAR_BIT) / chunkSizeBits;
+
+ // Turn a chunkSizeBits-array into a 64-bit array
+ std::memset(data, 0, blkSize);
+ for (int i = 0; i < chunks.size(); i++) {
+ const int index_64 = std::floor(i / (double)num_chunks_per_64);
+ const unsigned start = i % num_chunks_per_64;
+ replaceBits(data[index_64], (start + 1) * chunkSizeBits - 1,
+ start * chunkSizeBits, chunks[i]);
+ }
+}
+
+std::unique_ptr<Base::CompressionData>
+Base::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
{
// Apply compression
std::unique_ptr<CompressionData> comp_data =
- compress(data, comp_lat, decomp_lat);
+ compress(toChunks(data), comp_lat, decomp_lat);
// If we are in debug mode apply decompression just after the compression.
// If the results do not match, we've got an error
// Get compression size. If compressed size is greater than the size
// threshold, the compression is seen as unsuccessful
- comp_size_bits = comp_data->getSizeBits();
- if (comp_size_bits >= sizeThreshold * 8) {
- comp_size_bits = blkSize * 8;
+ std::size_t comp_size_bits = comp_data->getSizeBits();
+ if (comp_size_bits > sizeThreshold * CHAR_BIT) {
+ comp_size_bits = blkSize * CHAR_BIT;
+ comp_data->setSizeBits(comp_size_bits);
}
// Update stats
DPRINTF(CacheComp, "Compressed cache line from %d to %d bits. " \
"Compression latency: %llu, decompression latency: %llu\n",
blkSize*8, comp_size_bits, comp_lat, decomp_lat);
+
+ return std::move(comp_data);
}
Cycles
/**
* Base cache compressor interface. Every cache compressor must implement a
* compression and a decompression method.
+ *
+ * Compressors usually cannot parse all data input at once. Therefore, they
+ * typically divide the input into multiple *chunks*, and parse them one at
+ * a cycle.
*/
class Base : public SimObject
{
+ public:
+ /**
+ * Forward declaration of compression data. Every new compressor must
+ * create a new compression data based on it.
+ */
+ class CompressionData;
+
protected:
/**
- * This compressor must be able to access the protected functions of
- * its sub-compressors.
+ * A chunk is a basic lexical unit. The data being compressed is received
+ * by the compressor as a raw pointer. In order to parse this data, the
+ * compressor must divide it into smaller units. Typically, state-of-the-
+ * art compressors interpret cache lines as sequential 32-bit chunks
+ * (chunks), but any size is valid.
+ * @sa chunkSizeBits
*/
- friend class Multi;
+ typedef uint64_t Chunk;
/**
- * Forward declaration of compression data. Every new compressor must
- * create a new compression data based on it.
+ * This compressor must be able to access the protected functions of
+ * its sub-compressors.
*/
- class CompressionData;
+ friend class Multi;
/**
* Uncompressed cache line size (in bytes).
*/
const std::size_t blkSize;
+ /** Chunk size, in number of bits. */
+ const unsigned chunkSizeBits;
+
/**
* Size in bytes at which a compression is classified as bad and therefore
* the compressed block is restored to its uncompressed format.
Stats::Scalar decompressions;
} stats;
+ /**
+ * This function splits the raw data into chunks, so that it can be
+ * parsed by the compressor.
+ *
+ * @param data The raw pointer to the data being compressed.
+ * @return The raw data divided into a vector of sequential chunks.
+ */
+ std::vector<Chunk> toChunks(const uint64_t* data) const;
+
+ /**
+ * This function re-joins the chunks to recreate the original data.
+ *
+ * @param chunks The raw data divided into a vector of sequential chunks.
+ * @param data The raw pointer to the data.
+ */
+ void fromChunks(const std::vector<Chunk>& chunks, uint64_t* data) const;
+
/**
* Apply the compression process to the cache line.
* Returns the number of cycles used by the compressor, however it is
* The decompression latency is also returned, in order to avoid
* increasing simulation time and memory consumption.
*
- * @param cache_line The cache line to be compressed.
+ * @param chunks The cache line to be compressed, divided into chunks.
* @param comp_lat Compression latency in number of cycles.
* @param decomp_lat Decompression latency in number of cycles.
* @return Cache line after compression.
*/
virtual std::unique_ptr<CompressionData> compress(
- const uint64_t* cache_line, Cycles& comp_lat, Cycles& decomp_lat) = 0;
+ const std::vector<Chunk>& chunks, Cycles& comp_lat,
+ Cycles& decomp_lat) = 0;
/**
* Apply the decompression process to the compressed data.
* @param data The cache line to be compressed.
* @param comp_lat Compression latency in number of cycles.
* @param decomp_lat Decompression latency in number of cycles.
- * @param comp_size_bits Compressed data size (in bits).
+ * @return Cache line after compression.
*/
- void compress(const uint64_t* data, Cycles& comp_lat,
- Cycles& decomp_lat, std::size_t& comp_size_bits);
+ std::unique_ptr<CompressionData>
+ compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat);
/**
* Get the decompression latency if the block is compressed. Latency is 0
void addToDictionary(DictionaryEntry data) override;
- std::unique_ptr<Base::CompressionData>
- compress(const uint64_t* data, Cycles& comp_lat,
- Cycles& decomp_lat) override;
+ std::unique_ptr<Base::CompressionData> compress(
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
public:
typedef BaseDictionaryCompressorParams Params;
template <class BaseType, std::size_t DeltaSizeBits>
std::unique_ptr<Base::CompressionData>
-BaseDelta<BaseType, DeltaSizeBits>::compress(const uint64_t* data,
- Cycles& comp_lat, Cycles& decomp_lat)
+BaseDelta<BaseType, DeltaSizeBits>::compress(
+ const std::vector<Base::Chunk>& chunks, Cycles& comp_lat,
+ Cycles& decomp_lat)
{
std::unique_ptr<Base::CompressionData> comp_data =
- DictionaryCompressor<BaseType>::compress(data);
+ DictionaryCompressor<BaseType>::compress(chunks);
// If there are more bases than the maximum, the compressor failed.
// Otherwise, we have to take into account all bases that have not
}
std::unique_ptr<Base::CompressionData>
-CPack::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
+CPack::compress(const std::vector<Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat)
{
std::unique_ptr<Base::CompressionData> comp_data =
- DictionaryCompressor<uint32_t>::compress(data);
+ DictionaryCompressor<uint32_t>::compress(chunks);
// Set compression latency (Accounts for pattern matching, length
// generation, packaging and shifting)
void addToDictionary(DictionaryEntry data) override;
- /**
- * Apply compression.
- *
- * @param data The cache line to be compressed.
- * @param comp_lat Compression latency in number of cycles.
- * @param decomp_lat Decompression latency in number of cycles.
- * @return Cache line after compression.
- */
std::unique_ptr<Base::CompressionData> compress(
- const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
public:
/** Convenience typedef. */
/**
* Apply compression.
*
- * @param data The cache line to be compressed.
+ * @param chunks The cache line to be compressed.
* @return Cache line after compression.
*/
- std::unique_ptr<Base::CompressionData> compress(const uint64_t* data);
+ std::unique_ptr<Base::CompressionData> compress(
+ const std::vector<Chunk>& chunks);
using BaseDictionaryCompressor::compress;
template <class T>
std::unique_ptr<Base::CompressionData>
-DictionaryCompressor<T>::compress(const uint64_t* data)
+DictionaryCompressor<T>::compress(const std::vector<Chunk>& chunks)
{
std::unique_ptr<Base::CompressionData> comp_data =
instantiateDictionaryCompData();
// Compress every value sequentially
CompData* const comp_data_ptr = static_cast<CompData*>(comp_data.get());
- const std::vector<T> values((T*)data, (T*)data + blkSize / sizeof(T));
- for (const auto& value : values) {
+ for (const auto& value : chunks) {
std::unique_ptr<Pattern> pattern = compressValue(value);
DPRINTF(CacheComp, "Compressed %016x to %s\n", value,
pattern->print());
}
std::unique_ptr<Base::CompressionData>
-FPCD::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
+FPCD::compress(const std::vector<Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat)
{
std::unique_ptr<Base::CompressionData> comp_data =
- DictionaryCompressor<uint32_t>::compress(data);
+ DictionaryCompressor<uint32_t>::compress(chunks);
// Set compression latency (Accounts for zero checks, ones check, match
// previous check, match penultimate check, repeated values check, pattern
void addToDictionary(DictionaryEntry data) override;
std::unique_ptr<Base::CompressionData> compress(
- const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
public:
typedef FPCDParams Params;
}
std::unique_ptr<Base::CompressionData>
-Multi::compress(const uint64_t* cache_line, Cycles& comp_lat,
+Multi::compress(const std::vector<Chunk>& chunks, Cycles& comp_lat,
Cycles& decomp_lat)
{
struct Results
}
};
+ // Each sub-compressor can have its own chunk size; therefore, revert
+ // the chunks to raw data, so that they handle the conversion internally
+ uint64_t data[blkSize / sizeof(uint64_t)];
+ std::memset(data, 0, blkSize);
+ fromChunks(chunks, data);
+
// Find the ranking of the compressor outputs
std::priority_queue<std::shared_ptr<Results>,
std::vector<std::shared_ptr<Results>>, ResultsComparator> results;
for (unsigned i = 0; i < compressors.size(); i++) {
Cycles temp_decomp_lat;
auto temp_comp_data =
- compressors[i]->compress(cache_line, comp_lat, temp_decomp_lat);
+ compressors[i]->compress(data, comp_lat, temp_decomp_lat);
results.push(std::make_shared<Results>(i, std::move(temp_comp_data),
temp_decomp_lat, blkSize));
max_comp_lat = std::max(max_comp_lat, comp_lat);
~Multi();
std::unique_ptr<Base::CompressionData> compress(
- const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
void decompress(const CompressionData* comp_data, uint64_t* data) override;
};
namespace Compressor {
-Perfect::CompData::CompData(const uint64_t* data,
- std::size_t num_entries)
- : CompressionData(), entries(data, data + num_entries)
-{
-}
-
Perfect::Perfect(const Params *p)
- : Base(p),
- compressedSize(8 * blkSize / p->max_compression_ratio),
- compressionLatency(p->compression_latency),
- decompressionLatency(p->decompression_latency)
+ : Base(p), compressedSize(8 * blkSize / p->max_compression_ratio),
+ compressionLatency(p->compression_latency),
+ decompressionLatency(p->decompression_latency)
{
}
std::unique_ptr<Base::CompressionData>
-Perfect::compress(const uint64_t* cache_line, Cycles& comp_lat,
- Cycles& decomp_lat)
+Perfect::compress(const std::vector<Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat)
{
// Compress every word sequentially
- std::unique_ptr<Base::CompressionData> comp_data(
- new CompData(cache_line, blkSize/8));
+ std::unique_ptr<Base::CompressionData> comp_data(new CompData(chunks));
// Set relevant metadata
comp_data->setSizeBits(compressedSize);
uint64_t* data)
{
// Decompress every entry sequentially
- const std::vector<uint64_t>& entries =
- static_cast<const CompData*>(comp_data)->entries;
- assert(entries.size() == (blkSize/8));
- std::copy(entries.begin(), entries.end(), data);
+ fromChunks(static_cast<const CompData*>(comp_data)->chunks, data);
}
} // namespace Compressor
/** Number of cycles needed to perform decompression. */
const Cycles decompressionLatency;
- std::unique_ptr<CompressionData> compress(const uint64_t* cache_line,
- Cycles& comp_lat, Cycles& decomp_lat) override;
+ std::unique_ptr<CompressionData> compress(
+ const std::vector<Chunk>& chunks, Cycles& comp_lat,
+ Cycles& decomp_lat) override;
void decompress(const CompressionData* comp_data, uint64_t* data) override;
{
public:
/** The original data is simply copied over to this vector. */
- std::vector<uint64_t> entries;
+ std::vector<Chunk> chunks;
/**
* Default constructor that creates a copy of the original data.
*
- * @param data The data to be compressed.
- * @param num_entries The number of qwords in the data.
+ * @param chunks The data to be compressed.
*/
- CompData(const uint64_t* data, std::size_t num_entries);
+ CompData(const std::vector<Chunk>& chunks)
+ : CompressionData(), chunks(chunks)
+ {
+ }
~CompData() = default;
};
}
std::unique_ptr<Base::CompressionData>
-RepeatedQwords::compress(const uint64_t* data, Cycles& comp_lat,
- Cycles& decomp_lat)
+RepeatedQwords::compress(const std::vector<Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat)
{
std::unique_ptr<Base::CompressionData> comp_data =
- DictionaryCompressor::compress(data);
+ DictionaryCompressor::compress(chunks);
// Since there is a single value repeated over and over, there should be
// a single dictionary entry. If there are more, the compressor failed
void addToDictionary(DictionaryEntry data) override;
std::unique_ptr<Base::CompressionData> compress(
- const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
public:
typedef RepeatedQwordsCompressorParams Params;
}
std::unique_ptr<Base::CompressionData>
-Zero::compress(const uint64_t* data, Cycles& comp_lat,
+Zero::compress(const std::vector<Chunk>& chunks, Cycles& comp_lat,
Cycles& decomp_lat)
{
std::unique_ptr<Base::CompressionData> comp_data =
- DictionaryCompressor::compress(data);
+ DictionaryCompressor::compress(chunks);
// If there is any non-zero entry, the compressor failed
if (numEntries > 0) {
void addToDictionary(DictionaryEntry data) override;
std::unique_ptr<Base::CompressionData> compress(
- const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override;
+ const std::vector<Base::Chunk>& chunks,
+ Cycles& comp_lat, Cycles& decomp_lat) override;
public:
typedef ZeroCompressorParams Params;