From: Daniel R. Carvalho Date: Thu, 5 Jul 2018 12:53:15 +0000 (+0200) Subject: mem-cache: Implement FPC cache compressor X-Git-Tag: develop-gem5-snapshot~565 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0d091f6e07a3b78d3f3cdc8989bd53dec3f6387c;p=gem5.git mem-cache: Implement FPC cache compressor Implementation of Frequent Pattern Compression, proposed by Alameldeen et al. in "Frequent Pattern Compression: A Significance-Based Compression Scheme for L2 Caches". Change-Id: I6dca8ca6b3043b561140bc681dbdbe9f7cef27d7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36395 Reviewed-by: Nikos Nikoleris Reviewed-by: Jason Lowe-Power Maintainer: Nikos Nikoleris Tested-by: kokoro --- diff --git a/src/mem/cache/compressors/Compressors.py b/src/mem/cache/compressors/Compressors.py index a1f3706fd..b93a66a0d 100644 --- a/src/mem/cache/compressors/Compressors.py +++ b/src/mem/cache/compressors/Compressors.py @@ -148,6 +148,21 @@ class CPack(BaseDictionaryCompressor): decomp_chunks_per_cycle = 2 decomp_extra_latency = 1 +class FPC(BaseDictionaryCompressor): + type = 'FPC' + cxx_class = 'Compressor::FPC' + cxx_header = "mem/cache/compressors/fpc.hh" + + comp_chunks_per_cycle = 8 + comp_extra_latency = 1 + decomp_chunks_per_cycle = 4 + decomp_extra_latency = 1 + + # Dummy dictionary size, since FPC has no dictionary + dictionary_size = 1 + + zero_run_bits = Param.Int(3, "Number of bits of the zero run bit field") + class FPCD(BaseDictionaryCompressor): type = 'FPCD' cxx_class = 'Compressor::FPCD' diff --git a/src/mem/cache/compressors/SConscript b/src/mem/cache/compressors/SConscript index d73c2be8a..f696c045e 100644 --- a/src/mem/cache/compressors/SConscript +++ b/src/mem/cache/compressors/SConscript @@ -34,6 +34,7 @@ Source('base.cc') Source('base_dictionary_compressor.cc') Source('base_delta.cc') Source('cpack.cc') +Source('fpc.cc') Source('fpcd.cc') Source('multi.cc') Source('perfect.cc') diff --git a/src/mem/cache/compressors/dictionary_compressor.hh b/src/mem/cache/compressors/dictionary_compressor.hh index e5d67d7ed..fe70b5985 100644 --- a/src/mem/cache/compressors/dictionary_compressor.hh +++ b/src/mem/cache/compressors/dictionary_compressor.hh @@ -135,6 +135,8 @@ class DictionaryCompressor : public BaseDictionaryCompressor class RepeatedValuePattern; template class DeltaPattern; + template + class SignExtendedPattern; /** * Create a factory to determine if input matches a pattern. The if else @@ -346,7 +348,7 @@ class DictionaryCompressor::Pattern * * @return The size. */ - std::size_t + virtual std::size_t getSizeBits() const { return numUnmatchedBits + length; @@ -735,6 +737,55 @@ class DictionaryCompressor::DeltaPattern } }; +/** + * A pattern that checks whether the value is an N bits sign-extended value, + * that is, all the MSB starting from the Nth are equal to the (N-1)th bit. + * + * Therefore, if N = 8, and T has 16 bits, the values within the ranges + * [0x0000, 0x007F] and [0xFF80, 0xFFFF] would match this pattern. + * + * @tparam N The number of bits in the non-extended original value. It must + * fit in a dictionary entry. + */ +template +template +class DictionaryCompressor::SignExtendedPattern + : public DictionaryCompressor::Pattern +{ + private: + static_assert((N > 0) & (N <= (sizeof(T) * 8)), + "The original data's type size must be smaller than the dictionary's"); + + /** The non-extended original value. */ + const T bits : N; + + public: + SignExtendedPattern(const int number, + const uint64_t code, + const uint64_t metadata_length, + const DictionaryEntry bytes, + const bool allocate = false) + : DictionaryCompressor::Pattern(number, code, metadata_length, N, + -1, allocate), + bits(fromDictionaryEntry(bytes) & mask(N)) + { + } + + static bool + isPattern(const DictionaryEntry& bytes, + const DictionaryEntry& dict_bytes, const int match_location) + { + const T data = DictionaryCompressor::fromDictionaryEntry(bytes); + return data == sext(data & mask(N)); + } + + DictionaryEntry + decompress(const DictionaryEntry dict_bytes) const override + { + return toDictionaryEntry(sext(bits)); + } +}; + } // namespace Compressor #endif //__MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__ diff --git a/src/mem/cache/compressors/fpc.cc b/src/mem/cache/compressors/fpc.cc new file mode 100644 index 000000000..8398a3212 --- /dev/null +++ b/src/mem/cache/compressors/fpc.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018-2020 Inria + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/cache/compressors/fpc.hh" + +#include "mem/cache/compressors/dictionary_compressor_impl.hh" +#include "params/FPC.hh" + +namespace Compressor { + +FPC::FPCCompData::FPCCompData(int zero_run_size_bits) + : CompData(), zeroRunSizeBits(zero_run_size_bits) +{ +} + +void +FPC::FPCCompData::addEntry(std::unique_ptr pattern) +{ + // If this is a zero match, check for zero runs + if (pattern->getPatternNumber() == ZERO_RUN) { + // If it is a new zero run, create it; otherwise, increase current + // run's length + if (!entries.size() || + (entries.back()->getPatternNumber() != ZERO_RUN)) { + static_cast(pattern.get())->setRealSize(zeroRunSizeBits); + } else { + // A zero run has a maximum length, given by the number of bits + // used to represent it. When this limit is reached, a new run + // must be created + const int run_length = + static_cast(entries.back().get())->getRunLength(); + if (run_length == mask(zeroRunSizeBits)) { + // The limit for this zero run has been reached, so a new + // run must be started, with a sized pattern + static_cast(pattern.get())->setRealSize( + zeroRunSizeBits); + } else { + // Increase the current run's length. + // Since the first zero entry of the run contains the size, + // and all the following ones are created just to simplify + // decompression, this fake pattern will have a size of 0 bits + static_cast(pattern.get())->setRunLength( + run_length + 1); + } + } + } + + CompData::addEntry(std::move(pattern)); +} + +FPC::FPC(const Params &p) + : DictionaryCompressor(p), zeroRunSizeBits(p.zero_run_bits) +{ +} + +void +FPC::addToDictionary(const DictionaryEntry data) +{ + // There is no dictionary in FPC, so its size is zero, and no pattern + // causes an insertion in the dictionary. The only reason we do not + // assert it is that the UncompressedPattern implementation always + // inserts by default +} + +std::unique_ptr::CompData> +FPC::instantiateDictionaryCompData() const +{ + return std::unique_ptr::CompData>( + new FPCCompData(zeroRunSizeBits)); +} + +} // namespace Compressor + +Compressor::FPC* +FPCParams::create() const +{ + return new Compressor::FPC(*this); +} diff --git a/src/mem/cache/compressors/fpc.hh b/src/mem/cache/compressors/fpc.hh new file mode 100644 index 000000000..d13f3dd72 --- /dev/null +++ b/src/mem/cache/compressors/fpc.hh @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2018-2020 Inria + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** @file + * Definition of the Frequent Pattern Compression cache compressor, as + * described in "Frequent Pattern Compression: A Significance-Based + * Compression Scheme for L2 Caches". + */ + +#ifndef __MEM_CACHE_COMPRESSORS_FPC_HH__ +#define __MEM_CACHE_COMPRESSORS_FPC_HH__ + +#include +#include +#include +#include +#include +#include + +#include "base/bitfield.hh" +#include "base/types.hh" +#include "mem/cache/compressors/dictionary_compressor.hh" + +struct FPCParams; + +namespace Compressor { + +class FPC : public DictionaryCompressor +{ + private: + using DictionaryEntry = DictionaryCompressor::DictionaryEntry; + + /** + * Compression data for FPC. It contains a list of the patterns + * encountered while parsing the cache line. + */ + class FPCCompData; + + // Declaration of all possible patterns + class ZeroRun; + class SignExtended4Bits; + class SignExtended1Byte; + class SignExtendedHalfword; + class ZeroPaddedHalfword; + class SignExtendedTwoHalfwords; + class RepBytes; + class Uncompressed; + + /** + * The possible patterns. If a new pattern is added, it must be done + * before NUM_PATTERNS. + */ + typedef enum { + ZERO_RUN, SIGN_EXTENDED_4_BITS, SIGN_EXTENDED_1_BYTE, + SIGN_EXTENDED_HALFWORD, ZERO_PADDED_HALFWORD, + SIGN_EXTENDED_TWO_HALFWORDS, REP_BYTES, UNCOMPRESSED, + NUM_PATTERNS + } PatternNumber; + + /** + * Number of bits of the zero run size bitfield. If the size of the + * zero run reaches the maximum value, it is split into ZERO_RUN entries. + */ + const int zeroRunSizeBits; + + uint64_t getNumPatterns() const override { return NUM_PATTERNS; } + + std::string + getName(int number) const override + { + static std::map patternNames = { + {ZERO_RUN, "ZERO_RUN"}, + {SIGN_EXTENDED_4_BITS, "SignExtended4Bits"}, + {SIGN_EXTENDED_1_BYTE, "SignExtended1Byte"}, + {SIGN_EXTENDED_HALFWORD, "SignExtendedHalfword"}, + {ZERO_PADDED_HALFWORD, "ZeroPaddedHalfword"}, + {SIGN_EXTENDED_TWO_HALFWORDS, "SignExtendedTwoHalfwords"}, + {REP_BYTES, "RepBytes"}, + {UNCOMPRESSED, "Uncompressed"} + }; + + return patternNames[number]; + }; + + std::unique_ptr getPattern( + const DictionaryEntry& bytes, + const DictionaryEntry& dict_bytes, + const int match_location) const override + { + using PatternFactory = Factory; + return PatternFactory::getPattern(bytes, dict_bytes, match_location); + } + + void addToDictionary(const DictionaryEntry data) override; + + std::unique_ptr + instantiateDictionaryCompData() const override; + + public: + typedef FPCParams Params; + FPC(const Params &p); + ~FPC() = default; +}; + +class FPC::FPCCompData : public DictionaryCompressor::CompData +{ + protected: + /** + * Number of bits of the zero run size bitfield. If the size of the + * zero run reaches the maximum value, it is split into ZERO_RUN entries. + */ + const int zeroRunSizeBits; + + public: + FPCCompData(int zeroRunSizeBits); + ~FPCCompData() = default; + + void addEntry(std::unique_ptr pattern) override; +}; + +// Pattern implementations + +class FPC::ZeroRun : public MaskedValuePattern<0, 0xFFFFFFFF> +{ + private: + /** Run length so far. */ + int _runLength; + + /** + * A zero run consists of a main ZeroRun pattern, which has a meaningful + * real size (i.e., different from zero), and X-1 fake (i.e., they are + * zero-sized, and don't exist in a real implementation) patterns, with X + * being the size of the zero run. + */ + int _realSize; + + public: + ZeroRun(const DictionaryEntry bytes, const int match_location) + : MaskedValuePattern<0, 0xFFFFFFFF>(ZERO_RUN, ZERO_RUN, 3, -1, bytes, + false), + _runLength(0), _realSize(0) + { + } + + std::size_t + getSizeBits() const override + { + return _realSize; + } + + /** + * Get the number of zeros in the run so far. + * + * @return The number of zeros in this run. + */ + int getRunLength() const { return _runLength; } + + /** + * Set the number of zeros in the run so far. + * + * @param The number of zeros in this run. + */ + void setRunLength(int length) { _runLength = length; } + + /** + * When the real size is set it means that we are adding the main zero + * run pattern. When that happens, the metadata length must also be taken + * into account for the size calculation. + * + * @param size Number of bits used to represent the number of zeros in the + * run. + */ + void setRealSize(int size) { _realSize = length + size; } +}; + +class FPC::SignExtended4Bits : public SignExtendedPattern<4> +{ + public: + SignExtended4Bits(const DictionaryEntry bytes, const int match_location) + : SignExtendedPattern<4>(SIGN_EXTENDED_4_BITS, SIGN_EXTENDED_4_BITS, 3, + bytes) + { + } +}; + +class FPC::SignExtended1Byte : public SignExtendedPattern<8> +{ + public: + SignExtended1Byte(const DictionaryEntry bytes, const int match_location) + : SignExtendedPattern<8>(SIGN_EXTENDED_1_BYTE, SIGN_EXTENDED_1_BYTE, 3, + bytes) + { + } +}; + +class FPC::SignExtendedHalfword : public SignExtendedPattern<16> +{ + public: + SignExtendedHalfword(const DictionaryEntry bytes, const int match_location) + : SignExtendedPattern<16>(SIGN_EXTENDED_HALFWORD, SIGN_EXTENDED_HALFWORD, + 3, bytes) + { + } +}; + +class FPC::ZeroPaddedHalfword : public MaskedValuePattern<0, 0x0000FFFF> +{ + public: + ZeroPaddedHalfword(const DictionaryEntry bytes, const int match_location) + : MaskedValuePattern<0, 0x0000FFFF>(ZERO_PADDED_HALFWORD, + ZERO_PADDED_HALFWORD, 3, -1, bytes, false) + { + } +}; + +class FPC::SignExtendedTwoHalfwords : public Pattern +{ + private: + /** These are the bytes that are extended to form the two halfwords. */ + const int8_t extendedBytes[2]; + + public: + SignExtendedTwoHalfwords(const DictionaryEntry bytes, + const int match_location) + : Pattern(SIGN_EXTENDED_TWO_HALFWORDS, SIGN_EXTENDED_TWO_HALFWORDS, 3, + 16, -1, false), + extendedBytes{int8_t(fromDictionaryEntry(bytes) & mask(8)), + int8_t((fromDictionaryEntry(bytes) >> 16) & mask(8))} + { + } + + static bool + isPattern(const DictionaryEntry& bytes, + const DictionaryEntry& dict_bytes, const int match_location) + { + const uint32_t data = fromDictionaryEntry(bytes); + const int16_t halfwords[2] = { + int16_t(data & mask(16)), + int16_t((data >> 16) & mask(16)) + }; + return (halfwords[0] == sext<8>(halfwords[0] & mask(8))) && + (halfwords[1] == sext<8>(halfwords[1] & mask(8))); + } + + DictionaryEntry + decompress(const DictionaryEntry dict_bytes) const override + { + uint16_t halfwords[2] = { + uint16_t(sext<8>(extendedBytes[0]) & mask(16)), + uint16_t(sext<8>(extendedBytes[1]) & mask(16)) + }; + return toDictionaryEntry((halfwords[1] << 16) | halfwords[0]); + } +}; + +class FPC::RepBytes : public RepeatedValuePattern +{ + public: + RepBytes(const DictionaryEntry bytes, const int match_location) + : RepeatedValuePattern(REP_BYTES, REP_BYTES, 3, -1, bytes, + false) + { + } +}; + +class FPC::Uncompressed : public UncompressedPattern +{ + public: + Uncompressed(const DictionaryEntry bytes, const int match_location) + : UncompressedPattern(UNCOMPRESSED, UNCOMPRESSED, 3, -1, bytes) + { + } +}; + +} // namespace Compressor + +#endif //__MEM_CACHE_COMPRESSORS_FPC_HH__