mem-cache: Add a masked const value pattern to compressors
[gem5.git] / src / mem / cache / compressors / dictionary_compressor.hh
1 /*
2 * Copyright (c) 2018-2019 Inria
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Daniel Carvalho
29 */
30
31 /** @file
32 * Definition of a dictionary based cache compressor. Each entry is compared
33 * against a dictionary to search for matches.
34 *
35 * The dictionary is composed of 32-bit entries, and the comparison is done
36 * byte per byte.
37 *
38 * The patterns are implemented as individual classes that have a checking
39 * function isPattern(), to determine if the data fits the pattern, and a
40 * decompress() function, which decompresses the contents of a pattern.
41 * Every new pattern must inherit from the Pattern class and be added to the
42 * patternFactory.
43 */
44
45 #ifndef __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
46 #define __MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__
47
48 #include <array>
49 #include <cstdint>
50 #include <map>
51 #include <memory>
52 #include <string>
53 #include <vector>
54
55 #include "base/types.hh"
56 #include "mem/cache/compressors/base.hh"
57
58 struct BaseDictionaryCompressorParams;
59
60 class BaseDictionaryCompressor : public BaseCacheCompressor
61 {
62 protected:
63 /** Dictionary size. */
64 const std::size_t dictionarySize;
65
66 /** Number of valid entries in the dictionary. */
67 std::size_t numEntries;
68
69 /**
70 * @defgroup CompressionStats Compression specific statistics.
71 * @{
72 */
73
74 /** Number of data entries that were compressed to each pattern. */
75 Stats::Vector patternStats;
76
77 /**
78 * @}
79 */
80
81 /**
82 * Trick function to get the number of patterns.
83 *
84 * @return The number of defined patterns.
85 */
86 virtual uint64_t getNumPatterns() const = 0;
87
88 /**
89 * Get meta-name assigned to the given pattern.
90 *
91 * @param number The number of the pattern.
92 * @return The meta-name of the pattern.
93 */
94 virtual std::string getName(int number) const = 0;
95
96 public:
97 typedef BaseDictionaryCompressorParams Params;
98 BaseDictionaryCompressor(const Params *p);
99 ~BaseDictionaryCompressor() = default;
100
101 void regStats() override;
102 };
103
104 /**
105 * A template version of the dictionary compressor that allows to choose the
106 * dictionary size.
107 *
108 * @tparam The type of a dictionary entry (e.g., uint16_t, uint32_t, etc).
109 */
110 template <class T>
111 class DictionaryCompressor : public BaseDictionaryCompressor
112 {
113 protected:
114 /** Convenience typedef for a dictionary entry. */
115 typedef std::array<uint8_t, sizeof(T)> DictionaryEntry;
116
117 /**
118 * Compression data for the dictionary compressor. It consists of a vector
119 * of patterns.
120 */
121 class CompData;
122
123 // Forward declaration of a pattern
124 class Pattern;
125 class UncompressedPattern;
126 template <T mask>
127 class MaskedPattern;
128 template <T value, T mask>
129 class MaskedValuePattern;
130
131 /**
132 * Create a factory to determine if input matches a pattern. The if else
133 * chains are constructed by recursion. The patterns should be explored
134 * sorted by size for correct behaviour.
135 */
136 template <class Head, class... Tail>
137 struct Factory
138 {
139 static std::unique_ptr<Pattern> getPattern(
140 const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
141 const int match_location)
142 {
143 // If match this pattern, instantiate it. If a negative match
144 // location is used, the patterns that use the dictionary bytes
145 // must return false. This is used when there are no dictionary
146 // entries yet
147 if (Head::isPattern(bytes, dict_bytes, match_location)) {
148 return std::unique_ptr<Pattern>(
149 new Head(bytes, match_location));
150 // Otherwise, go for next pattern
151 } else {
152 return Factory<Tail...>::getPattern(bytes, dict_bytes,
153 match_location);
154 }
155 }
156 };
157
158 /**
159 * Specialization to end the recursion. This must be called when all
160 * other patterns failed, and there is no choice but to leave data
161 * uncompressed. As such, this pattern must inherit from the uncompressed
162 * pattern.
163 */
164 template <class Head>
165 struct Factory<Head>
166 {
167 static_assert(std::is_base_of<UncompressedPattern, Head>::value,
168 "The last pattern must always be derived from the uncompressed "
169 "pattern.");
170
171 static std::unique_ptr<Pattern>
172 getPattern(const DictionaryEntry& bytes,
173 const DictionaryEntry& dict_bytes, const int match_location)
174 {
175 return std::unique_ptr<Pattern>(new Head(bytes, match_location));
176 }
177 };
178
179 /** The dictionary. */
180 std::vector<DictionaryEntry> dictionary;
181
182 /**
183 * Since the factory cannot be instantiated here, classes that inherit
184 * from this base class have to implement the call to their factory's
185 * getPattern.
186 */
187 virtual std::unique_ptr<Pattern>
188 getPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
189 const int match_location) const = 0;
190
191 /**
192 * Compress data.
193 *
194 * @param data Data to be compressed.
195 * @return The pattern this data matches.
196 */
197 std::unique_ptr<Pattern> compressValue(const T data);
198
199 /**
200 * Decompress a pattern into a value that fits in a dictionary entry.
201 *
202 * @param pattern The pattern to be decompressed.
203 * @return The decompressed word.
204 */
205 T decompressValue(const Pattern* pattern);
206
207 /** Clear all dictionary entries. */
208 void resetDictionary();
209
210 /**
211 * Add an entry to the dictionary.
212 *
213 * @param data The new entry.
214 */
215 virtual void addToDictionary(const DictionaryEntry data) = 0;
216
217 /**
218 * Apply compression.
219 *
220 * @param data The cache line to be compressed.
221 * @return Cache line after compression.
222 */
223 std::unique_ptr<BaseCacheCompressor::CompressionData> compress(
224 const uint64_t* data);
225
226 /**
227 * Decompress data.
228 *
229 * @param comp_data Compressed cache line.
230 * @param data The cache line to be decompressed.
231 */
232 void decompress(const CompressionData* comp_data, uint64_t* data) override;
233
234 /**
235 * Turn a value into a dictionary entry.
236 *
237 * @param value The value to turn.
238 * @return A dictionary entry containing the value.
239 */
240 static DictionaryEntry toDictionaryEntry(T value);
241
242 /**
243 * Turn a dictionary entry into a value.
244 *
245 * @param The dictionary entry to turn.
246 * @return The value that the dictionary entry contained.
247 */
248 static T fromDictionaryEntry(const DictionaryEntry& entry);
249
250 public:
251 typedef BaseDictionaryCompressorParams Params;
252 DictionaryCompressor(const Params *p);
253 ~DictionaryCompressor() = default;
254 };
255
256 /**
257 * The compressed data is composed of multiple pattern entries. To add a new
258 * pattern one should inherit from this class and implement isPattern() and
259 * decompress(). Then the new pattern must be added to the PatternFactory
260 * declaration in crescent order of size (in the DictionaryCompressor class).
261 */
262 template <class T>
263 class DictionaryCompressor<T>::Pattern
264 {
265 protected:
266 /** Pattern enum number. */
267 const int patternNumber;
268
269 /** Code associated to the pattern. */
270 const uint8_t code;
271
272 /** Length, in bits, of the code and match location. */
273 const uint8_t length;
274
275 /** Number of unmatched bytes. */
276 const uint8_t numUnmatchedBytes;
277
278 /** Index representing the the match location. */
279 const int matchLocation;
280
281 /** Wether the pattern allocates a dictionary entry or not. */
282 const bool allocate;
283
284 public:
285 /**
286 * Default constructor.
287 *
288 * @param number Pattern number.
289 * @param code Code associated to this pattern.
290 * @param metadata_length Length, in bits, of the code and match location.
291 * @param num_unmatched_bytes Number of unmatched bytes.
292 * @param match_location Index of the match location.
293 */
294 Pattern(const int number, const uint64_t code,
295 const uint64_t metadata_length, const uint64_t num_unmatched_bytes,
296 const int match_location, const bool allocate = true)
297 : patternNumber(number), code(code), length(metadata_length),
298 numUnmatchedBytes(num_unmatched_bytes),
299 matchLocation(match_location), allocate(allocate)
300 {
301 }
302
303 /** Default destructor. */
304 virtual ~Pattern() = default;
305
306 /**
307 * Get enum number associated to this pattern.
308 *
309 * @return The pattern enum number.
310 */
311 int getPatternNumber() const { return patternNumber; };
312
313 /**
314 * Get code of this pattern.
315 *
316 * @return The code.
317 */
318 uint8_t getCode() const { return code; }
319
320 /**
321 * Get the index of the dictionary match location.
322 *
323 * @return The index of the match location.
324 */
325 uint8_t getMatchLocation() const { return matchLocation; }
326
327 /**
328 * Get size, in bits, of the pattern (excluding prefix). Corresponds to
329 * unmatched_data_size + code_length.
330 *
331 * @return The size.
332 */
333 std::size_t
334 getSizeBits() const
335 {
336 return numUnmatchedBytes*CHAR_BIT + length;
337 }
338
339 /**
340 * Determine if pattern allocates a dictionary entry.
341 *
342 * @return True if should allocate a dictionary entry.
343 */
344 bool shouldAllocate() const { return allocate; }
345
346 /**
347 * Extract pattern's information to a string.
348 *
349 * @return A string containing the relevant pattern metadata.
350 */
351 std::string
352 print() const
353 {
354 return csprintf("pattern %s (encoding %x, size %u bits)",
355 getPatternNumber(), getCode(), getSizeBits());
356 }
357
358 /**
359 * Decompress the pattern. Each pattern has its own way of interpreting
360 * its data.
361 *
362 * @param dict_bytes The bytes in the corresponding matching entry.
363 * @return The decompressed pattern.
364 */
365 virtual DictionaryEntry decompress(
366 const DictionaryEntry dict_bytes) const = 0;
367 };
368
369 template <class T>
370 class DictionaryCompressor<T>::CompData : public CompressionData
371 {
372 public:
373 /** The patterns matched in the original line. */
374 std::vector<std::unique_ptr<Pattern>> entries;
375
376 CompData();
377 ~CompData() = default;
378
379 /**
380 * Add a pattern entry to the list of patterns.
381 *
382 * @param entry The new pattern entry.
383 */
384 virtual void addEntry(std::unique_ptr<Pattern>);
385 };
386
387 /**
388 * A pattern containing the original uncompressed data. This should be the
389 * worst case of every pattern factory, where if all other patterns fail,
390 * an instance of this pattern is created.
391 */
392 template <class T>
393 class DictionaryCompressor<T>::UncompressedPattern
394 : public DictionaryCompressor<T>::Pattern
395 {
396 private:
397 /** A copy of the original data. */
398 const DictionaryEntry data;
399
400 public:
401 UncompressedPattern(const int number,
402 const uint64_t code,
403 const uint64_t metadata_length,
404 const int match_location,
405 const DictionaryEntry bytes)
406 : DictionaryCompressor<T>::Pattern(number, code, metadata_length,
407 sizeof(T), match_location, true),
408 data(bytes)
409 {
410 }
411
412 static bool
413 isPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
414 const int match_location)
415 {
416 // An entry can always be uncompressed
417 return true;
418 }
419
420 DictionaryEntry
421 decompress(const DictionaryEntry dict_bytes) const override
422 {
423 return data;
424 }
425 };
426
427 /**
428 * A pattern that compares masked values against dictionary entries. If
429 * the masked dictionary entry matches perfectly the masked value to be
430 * compressed, there is a pattern match.
431 *
432 * For example, if the mask is 0xFF00 (that is, this pattern matches the MSB),
433 * the value (V) 0xFF20 is being compressed, and the dictionary contains
434 * the value (D) 0xFF03, this is a match (V & mask == 0xFF00 == D & mask),
435 * and 0x0020 is added to the list of unmatched bits.
436 *
437 * @tparam mask A mask containing the bits that must match.
438 */
439 template <class T>
440 template <T mask>
441 class DictionaryCompressor<T>::MaskedPattern
442 : public DictionaryCompressor<T>::Pattern
443 {
444 private:
445 static_assert(mask != 0, "The pattern's value mask must not be zero. Use "
446 "the uncompressed pattern instead.");
447
448 /** A copy of the bits that do not belong to the mask. */
449 const T bits;
450
451 public:
452 MaskedPattern(const int number,
453 const uint64_t code,
454 const uint64_t metadata_length,
455 const int match_location,
456 const DictionaryEntry bytes,
457 const bool allocate = true)
458 : DictionaryCompressor<T>::Pattern(number, code, metadata_length,
459 popCount(~mask) / 8, match_location, allocate),
460 bits(DictionaryCompressor<T>::fromDictionaryEntry(bytes) & ~mask)
461 {
462 }
463
464 static bool
465 isPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
466 const int match_location)
467 {
468 const T masked_bytes =
469 DictionaryCompressor<T>::fromDictionaryEntry(bytes) & mask;
470 const T masked_dict_bytes =
471 DictionaryCompressor<T>::fromDictionaryEntry(dict_bytes) & mask;
472 return (match_location >= 0) && (masked_bytes == masked_dict_bytes);
473 }
474
475 DictionaryEntry
476 decompress(const DictionaryEntry dict_bytes) const override
477 {
478 const T masked_dict_bytes =
479 DictionaryCompressor<T>::fromDictionaryEntry(dict_bytes) & mask;
480 return DictionaryCompressor<T>::toDictionaryEntry(
481 bits | masked_dict_bytes);
482 }
483 };
484
485 /**
486 * A pattern that compares masked values to a masked portion of a fixed value.
487 * If all the masked bits match the provided non-dictionary value, there is a
488 * pattern match.
489 *
490 * For example, assume the mask is 0xFF00 (that is, this pattern matches the
491 * MSB), and we are searching for data containing only ones (i.e., the fixed
492 * value is 0xFFFF).
493 * If the value (V) 0xFF20 is being compressed, this is a match (V & mask ==
494 * 0xFF00 == 0xFFFF & mask), and 0x20 is added to the list of unmatched bits.
495 * If the value (V2) 0x0120 is being compressed, this is not a match
496 * ((V2 & mask == 0x0100) != (0xFF00 == 0xFFFF & mask).
497 *
498 * @tparam value The value that is being matched against.
499 * @tparam mask A mask containing the bits that must match the given value.
500 */
501 template <class T>
502 template <T value, T mask>
503 class DictionaryCompressor<T>::MaskedValuePattern
504 : public MaskedPattern<mask>
505 {
506 private:
507 static_assert(mask != 0, "The pattern's value mask must not be zero.");
508
509 public:
510 MaskedValuePattern(const int number,
511 const uint64_t code,
512 const uint64_t metadata_length,
513 const int match_location,
514 const DictionaryEntry bytes,
515 const bool allocate = false)
516 : MaskedPattern<mask>(number, code, metadata_length, match_location,
517 bytes, allocate)
518 {
519 }
520
521 static bool
522 isPattern(const DictionaryEntry& bytes, const DictionaryEntry& dict_bytes,
523 const int match_location)
524 {
525 // Compare the masked fixed value to the value being checked for
526 // patterns. Since the dictionary is not being used the match_location
527 // is irrelevant.
528 const T masked_bytes =
529 DictionaryCompressor<T>::fromDictionaryEntry(bytes) & mask;
530 return ((value & mask) == masked_bytes);
531 }
532
533 DictionaryEntry
534 decompress(const DictionaryEntry dict_bytes) const override
535 {
536 return MaskedPattern<mask>::decompress(
537 DictionaryCompressor<T>::toDictionaryEntry(value));
538 }
539 };
540
541 #endif //__MEM_CACHE_COMPRESSORS_DICTIONARY_COMPRESSOR_HH__