9e0e4df804b5dacca98ddd2658fe8d29c8ddfb31
[gem5.git] / src / mem / cache / compressors / dictionary_compressor.hh
1 /*
2 * Copyright (c) 2018-2019 Inria
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Daniel Carvalho
29 */
30
31 /** @file
32 * Definition of a dictionary based cache compressor. Each entry is compared
33 * against a dictionary to search for matches.
34 *
35 * The dictionary is composed of 32-bit entries, and the comparison is done
36 * byte per byte.
37 *
38 * The patterns are implemented as individual classes that have a checking
39 * function isPattern(), to determine if the data fits the pattern, and a
40 * decompress() function, which decompresses the contents of a pattern.
41 * Every new pattern must inherit from the Pattern class and be added to the
42 * patternFactory.
43 */
44
45 #ifndef __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
46 #define __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
47
48 #include <array>
49 #include <cstdint>
50 #include <map>
51 #include <memory>
52 #include <string>
53 #include <vector>
54
55 #include "base/types.hh"
56 #include "mem/cache/compressors/base.hh"
57
58 struct BaseDictionaryCompressorParams;
59
60 class BaseDictionaryCompressor : public BaseCacheCompressor
61 {
62 protected:
63 /** Dictionary size. */
64 const std::size_t dictionarySize;
65
66 /** Number of valid entries in the dictionary. */
67 std::size_t numEntries;
68
69 /**
70 * @defgroup CompressionStats Compression specific statistics.
71 * @{
72 */
73
74 /** Number of data entries that were compressed to each pattern. */
75 Stats::Vector patternStats;
76
77 /**
78 * @}
79 */
80
81 /**
82 * Trick function to get the number of patterns.
83 *
84 * @return The number of defined patterns.
85 */
86 virtual uint64_t getNumPatterns() const = 0;
87
88 /**
89 * Get meta-name assigned to the given pattern.
90 *
91 * @param number The number of the pattern.
92 * @return The meta-name of the pattern.
93 */
94 virtual std::string getName(int number) const = 0;
95
96 public:
97 typedef BaseDictionaryCompressorParams Params;
98 BaseDictionaryCompressor(const Params *p);
99 ~BaseDictionaryCompressor() = default;
100
101 void regStats() override;
102 };
103
104 /**
105 * A template version of the dictionary compressor that allows to choose the
106 * dictionary size.
107 *
108 * @tparam The type of a dictionary entry (e.g., uint16_t, uint32_t, etc).
109 */
110 template <class T>
111 class DictionaryCompressor : public BaseDictionaryCompressor
112 {
113 protected:
114 /**
115 * Compression data for the dictionary compressor. It consists of a vector
116 * of patterns.
117 */
118 class CompData;
119
120 // Forward declaration of a pattern
121 class Pattern;
122 class UncompressedPattern;
123
124 /** Convenience typedef for a dictionary entry. */
125 typedef std::array<uint8_t, sizeof(T)> DictionaryEntry;
126
127 /**
128 * Create a factory to determine if input matches a pattern. The if else
129 * chains are constructed by recursion. The patterns should be explored
130 * sorted by size for correct behaviour.
131 */
132 template <class Head, class... Tail>
133 struct Factory
134 {
135 static std::unique_ptr<Pattern> getPattern(
136 const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
137 const int match_location)
138 {
139 // If match this pattern, instantiate it. If a negative match
140 // location is used, the patterns that use the dictionary bytes
141 // must return false. This is used when there are no dictionary
142 // entries yet
143 if (Head::isPattern(bytes, dict_bytes, match_location)) {
144 return std::unique_ptr<Pattern>(
145 new Head(bytes, match_location));
146 // Otherwise, go for next pattern
147 } else {
148 return Factory<Tail...>::getPattern(bytes, dict_bytes,
149 match_location);
150 }
151 }
152 };
153
154 /**
155 * Specialization to end the recursion. This must be called when all
156 * other patterns failed, and there is no choice but to leave data
157 * uncompressed. As such, this pattern must inherit from the uncompressed
158 * pattern.
159 */
160 template <class Head>
161 struct Factory<Head>
162 {
163 static_assert(std::is_base_of<UncompressedPattern, Head>::value,
164 "The last pattern must always be derived from the uncompressed "
165 "pattern.");
166
167 static std::unique_ptr<Pattern>
168 getPattern(const DictionaryEntry& bytes,
169 const DictionaryEntry& dict_bytes, const int match_location)
170 {
171 return std::unique_ptr<Pattern>(new Head(bytes, match_location));
172 }
173 };
174
175 /** The dictionary. */
176 std::vector<DictionaryEntry> dictionary;
177
178 /**
179 * Since the factory cannot be instantiated here, classes that inherit
180 * from this base class have to implement the call to their factory's
181 * getPattern.
182 */
183 virtual std::unique_ptr<Pattern>
184 getPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
185 const int match_location) const = 0;
186
187 /**
188 * Compress data.
189 *
190 * @param data Data to be compressed.
191 * @return The pattern this data matches.
192 */
193 std::unique_ptr<Pattern> compressValue(const T data);
194
195 /**
196 * Decompress a pattern into a value that fits in a dictionary entry.
197 *
198 * @param pattern The pattern to be decompressed.
199 * @return The decompressed word.
200 */
201 T decompressValue(const Pattern* pattern);
202
203 /** Clear all dictionary entries. */
204 void resetDictionary();
205
206 /**
207 * Add an entry to the dictionary.
208 *
209 * @param data The new entry.
210 */
211 virtual void addToDictionary(const DictionaryEntry data) = 0;
212
213 /**
214 * Apply compression.
215 *
216 * @param data The cache line to be compressed.
217 * @return Cache line after compression.
218 */
219 std::unique_ptr<BaseCacheCompressor::CompressionData> compress(
220 const uint64_t* data);
221
222 /**
223 * Decompress data.
224 *
225 * @param comp_data Compressed cache line.
226 * @param data The cache line to be decompressed.
227 */
228 void decompress(const CompressionData* comp_data, uint64_t* data) override;
229
230 /**
231 * Turn a value into a dictionary entry.
232 *
233 * @param value The value to turn.
234 * @return A dictionary entry containing the value.
235 */
236 static DictionaryEntry toDictionaryEntry(T value);
237
238 /**
239 * Turn a dictionary entry into a value.
240 *
241 * @param The dictionary entry to turn.
242 * @return The value that the dictionary entry contained.
243 */
244 static T fromDictionaryEntry(const DictionaryEntry& entry);
245
246 public:
247 typedef BaseDictionaryCompressorParams Params;
248 DictionaryCompressor(const Params *p);
249 ~DictionaryCompressor() = default;
250 };
251
252 /**
253 * The compressed data is composed of multiple pattern entries. To add a new
254 * pattern one should inherit from this class and implement isPattern() and
255 * decompress(). Then the new pattern must be added to the PatternFactory
256 * declaration in crescent order of size (in the DictionaryCompressor class).
257 */
258 template <class T>
259 class DictionaryCompressor<T>::Pattern
260 {
261 protected:
262 /** Pattern enum number. */
263 const int patternNumber;
264
265 /** Code associated to the pattern. */
266 const uint8_t code;
267
268 /** Length, in bits, of the code and match location. */
269 const uint8_t length;
270
271 /** Number of unmatched bytes. */
272 const uint8_t numUnmatchedBytes;
273
274 /** Index representing the the match location. */
275 const int matchLocation;
276
277 /** Wether the pattern allocates a dictionary entry or not. */
278 const bool allocate;
279
280 public:
281 /**
282 * Default constructor.
283 *
284 * @param number Pattern number.
285 * @param code Code associated to this pattern.
286 * @param metadata_length Length, in bits, of the code and match location.
287 * @param num_unmatched_bytes Number of unmatched bytes.
288 * @param match_location Index of the match location.
289 */
290 Pattern(const int number, const uint64_t code,
291 const uint64_t metadata_length, const uint64_t num_unmatched_bytes,
292 const int match_location, const bool allocate = true)
293 : patternNumber(number), code(code), length(metadata_length),
294 numUnmatchedBytes(num_unmatched_bytes),
295 matchLocation(match_location), allocate(allocate)
296 {
297 }
298
299 /** Default destructor. */
300 virtual ~Pattern() = default;
301
302 /**
303 * Get enum number associated to this pattern.
304 *
305 * @return The pattern enum number.
306 */
307 int getPatternNumber() const { return patternNumber; };
308
309 /**
310 * Get code of this pattern.
311 *
312 * @return The code.
313 */
314 uint8_t getCode() const { return code; }
315
316 /**
317 * Get the index of the dictionary match location.
318 *
319 * @return The index of the match location.
320 */
321 uint8_t getMatchLocation() const { return matchLocation; }
322
323 /**
324 * Get size, in bits, of the pattern (excluding prefix). Corresponds to
325 * unmatched_data_size + code_length.
326 *
327 * @return The size.
328 */
329 std::size_t
330 getSizeBits() const
331 {
332 return numUnmatchedBytes*CHAR_BIT + length;
333 }
334
335 /**
336 * Determine if pattern allocates a dictionary entry.
337 *
338 * @return True if should allocate a dictionary entry.
339 */
340 bool shouldAllocate() const { return allocate; }
341
342 /**
343 * Extract pattern's information to a string.
344 *
345 * @return A string containing the relevant pattern metadata.
346 */
347 std::string
348 print() const
349 {
350 return csprintf("pattern %s (encoding %x, size %u bits)",
351 getPatternNumber(), getCode(), getSizeBits());
352 }
353
354 /**
355 * Decompress the pattern. Each pattern has its own way of interpreting
356 * its data.
357 *
358 * @param dict_bytes The bytes in the corresponding matching entry.
359 * @return The decompressed pattern.
360 */
361 virtual DictionaryEntry decompress(
362 const DictionaryEntry dict_bytes) const = 0;
363 };
364
365 template <class T>
366 class DictionaryCompressor<T>::CompData : public CompressionData
367 {
368 public:
369 /** The patterns matched in the original line. */
370 std::vector<std::unique_ptr<Pattern>> entries;
371
372 CompData();
373 ~CompData() = default;
374
375 /**
376 * Add a pattern entry to the list of patterns.
377 *
378 * @param entry The new pattern entry.
379 */
380 virtual void addEntry(std::unique_ptr<Pattern>);
381 };
382
383 /**
384 * A pattern containing the original uncompressed data. This should be the
385 * worst case of every pattern factory, where if all other patterns fail,
386 * an instance of this pattern is created.
387 */
388 template <class T>
389 class DictionaryCompressor<T>::UncompressedPattern
390 : public DictionaryCompressor<T>::Pattern
391 {
392 private:
393 /** A copy of the original data. */
394 const DictionaryEntry data;
395
396 public:
397 UncompressedPattern(const int number,
398 const uint64_t code,
399 const uint64_t metadata_length,
400 const int match_location,
401 const DictionaryEntry bytes)
402 : DictionaryCompressor<T>::Pattern(number, code, metadata_length,
403 sizeof(T), match_location, true),
404 data(bytes)
405 {
406 }
407
408 static bool
409 isPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
410 const int match_location)
411 {
412 // An entry can always be uncompressed
413 return true;
414 }
415
416 DictionaryEntry
417 decompress(const DictionaryEntry dict_bytes) const override
418 {
419 return data;
420 }
421 };
422
423 #endif //__MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__