a8068c6146593920655d7992709ce66af74286a8
2 * Copyright (c) 2018 Inria
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * Authors: Daniel Carvalho
32 * Implementation of the BDI cache compressor.
35 #include "mem/cache/compressors/bdi.hh"
40 #include <type_traits>
42 #include "debug/CacheComp.hh"
43 #include "params/BDI.hh"
45 // Number of bytes in a qword
46 #define BYTES_PER_QWORD 8
48 // Declare BDI encoding names
49 const char* BDI::ENCODING_NAMES
[] =
50 {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1",
51 "Base4_2", "Base2_1", "Uncompressed"};
53 BDI::BDICompData::BDICompData(const uint8_t encoding
)
54 : CompressionData(), _encoding(encoding
)
59 BDI::BDICompData::getEncoding() const
65 BDI::BDICompData::getName() const
67 return ENCODING_NAMES
[_encoding
];
70 BDI::BDICompDataZeros::BDICompDataZeros()
73 // Calculate compressed size
74 calculateCompressedSize();
78 BDI::BDICompDataZeros::access(const int index
) const
84 BDI::BDICompDataZeros::calculateCompressedSize()
86 // Number of bits used by Encoding
87 std::size_t size
= encodingBits
;
92 BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value
)
93 : BDICompData(REP_VALUES
)
98 // Calculate compressed size
99 calculateCompressedSize();
103 BDI::BDICompDataRep::access(const int index
) const
109 BDI::BDICompDataRep::calculateCompressedSize()
111 // Number of bits used by Encoding
112 std::size_t size
= encodingBits
;
114 // Number of bits used by Base
115 size
+= sizeof(base
)*CHAR_BIT
;
120 BDI::BDICompDataUncompressed::BDICompDataUncompressed(
121 const uint64_t* data
, const std::size_t blk_size
)
122 : BDICompData(UNCOMPRESSED
), blkSize(blk_size
),
123 _data(data
, data
+ blk_size
/CHAR_BIT
)
125 // Calculate compressed size
126 calculateCompressedSize();
130 BDI::BDICompDataUncompressed::access(const int index
) const
136 BDI::BDICompDataUncompressed::calculateCompressedSize()
138 // Number of bits used by Encoding
139 std::size_t size
= encodingBits
;
141 // Number of bits used by uncompressed line
142 size
+= blkSize
*CHAR_BIT
;
147 template <class TB
, class TD
>
148 BDI::BDICompDataBaseDelta
<TB
, TD
>::BDICompDataBaseDelta(const uint8_t encoding
,
149 const std::size_t blk_size
, const std::size_t max_num_bases
)
150 : BDICompData(encoding
), maxNumBases(max_num_bases
)
152 // Reserve the maximum possible size for the vectors
153 bases
.reserve(maxNumBases
);
154 bitMask
.reserve(blk_size
/sizeof(TD
));
155 deltas
.reserve(blk_size
/sizeof(TD
));
157 // Push virtual base 0 to bases list
161 template <class TB
, class TD
>
163 BDI::BDICompDataBaseDelta
<TB
, TD
>::calculateCompressedSize()
165 // Number of bits used by Encoding
166 std::size_t size
= encodingBits
;
168 // Number of bits used by BitMask
169 size
+= bitMask
.size()*std::ceil(std::log2(maxNumBases
));
171 // Number of bits used by Bases. bases[0] is implicit in a hardware
172 // implementation, therefore its size is 0
173 size
+= (maxNumBases
-1)*sizeof(TB
)*CHAR_BIT
;
175 // Number of bits used by Deltas
176 size
+= deltas
.size()*sizeof(TD
)*CHAR_BIT
;
178 CompressionData::setSizeBits(size
);
181 template <class TB
, class TD
>
183 BDI::BDICompDataBaseDelta
<TB
, TD
>::addBase(const TB base
)
185 // Can't add base if reached limit of number of bases
186 if (bases
.size() >= maxNumBases
) {
190 // Push new base to end of bases list
191 bases
.push_back(base
);
193 // New delta is 0, as it is a difference between the new base and itself
194 addDelta(bases
.size() - 1, 0);
199 template <class TB
, class TD
>
201 BDI::BDICompDataBaseDelta
<TB
, TD
>::addDelta(const std::size_t base_index
,
204 // Insert new delta with respect to the given base
205 bitMask
.push_back(base_index
);
208 deltas
.push_back(delta
);
211 template <class TB
, class TD
> bool
212 BDI::BDICompDataBaseDelta
<TB
, TD
>::compress(const uint64_t* data
,
213 const std::size_t blk_size
)
215 // Parse through data in a sizeof(TB) granularity
216 for (std::size_t byte_start
= 0; byte_start
< blk_size
;
217 byte_start
+= sizeof(TB
))
221 std::memcpy(&curValue
, ((uint8_t*)data
) + byte_start
,
224 // Iterate through all bases to search for a valid delta
225 bool foundDelta
= false;
226 for (int base_index
= 0; base_index
< bases
.size(); base_index
++) {
227 // Calculate delta relative to currently parsed base
228 typename
std::make_signed
<TB
>::type delta
= curValue
-
231 // Check if the delta is within the limits of the delta size. If
232 // that is the case, add delta to compressed data and keep parsing
234 typename
std::make_signed
<TB
>::type limit
=
235 ULLONG_MAX
>>((BYTES_PER_QWORD
-sizeof(TD
))*CHAR_BIT
+1);
236 if ((delta
>= -limit
) && (delta
<= limit
)) {
237 addDelta(base_index
, delta
);
243 // If we cannot find a base that can accommodate this new line's data,
244 // add this value as the new base and insert its respective delta of 0.
245 // If the new base can't be added, it means that we reached the base
246 // limit, so line is uncompressible using the given encoding
247 if (!foundDelta
&& !addBase(curValue
)) {
252 // Calculate compressed size
253 calculateCompressedSize();
258 template <class TB
, class TD
>
260 BDI::BDICompDataBaseDelta
<TB
, TD
>::access(const int index
) const
262 // We decompress all base-delta pairs that form the 64-bit entry
263 // corresponding to the given 64-bit-array index
266 // Get relationship between the size of an uint64_t base and size of TB
267 const std::size_t size_diff
= sizeof(uint64_t)/sizeof(TB
);
269 // Mask for a base entry
270 const uint64_t mask
= ULLONG_MAX
>>((BYTES_PER_QWORD
-sizeof(TB
))*CHAR_BIT
);
272 // Size, in bits, of a base entry. Cant be const because compiler will
273 // optimize and create a 64 bit instance, which will generate a shift size
275 int base_size_bits
= sizeof(TB
)*CHAR_BIT
;
277 // Concatenate all base-delta entries until they form a 64-bit entry
278 for (int delta_index
= size_diff
* (index
+ 1) - 1;
279 delta_index
>= (int)(size_diff
* index
); delta_index
--) {
280 // Get base and delta entries corresponding to the current delta
281 assert(delta_index
< deltas
.size());
282 const TD delta
= deltas
[delta_index
];
283 const int base_index
= bitMask
[delta_index
];
284 assert(base_index
< bases
.size());
285 const TB base
= bases
[base_index
];
287 // Concatenate decompressed value
288 value
<<= base_size_bits
;
289 value
|= static_cast<uint64_t>((base
+ delta
) & mask
);
295 BDI::BDI(const Params
*p
)
296 : BaseCacheCompressor(p
), useMoreCompressors(p
->use_more_compressors
),
297 qwordsPerCacheLine(blkSize
/BYTES_PER_QWORD
)
299 static_assert(sizeof(ENCODING_NAMES
)/sizeof(char*) == NUM_ENCODINGS
,
300 "Number of encodings doesn't match the number of names");
304 BDI::isZeroPackable(const uint64_t* data
) const
306 return std::all_of(data
, data
+ qwordsPerCacheLine
,
307 [](const uint64_t entry
){ return entry
== 0; });
311 BDI::isSameValuePackable(const uint64_t* data
) const
313 // We don't want to copy the whole array to the lambda expression
314 const uint64_t rep_value
= data
[0];
315 return std::all_of(data
, data
+ qwordsPerCacheLine
,
316 [rep_value
](const uint64_t entry
)
317 {return entry
== rep_value
;});
320 template <class TB
, class TD
>
321 std::unique_ptr
<BDI::BDICompData
>
322 BDI::tryCompress(const uint64_t* data
, const uint8_t encoding
) const
324 // Instantiate compressor
325 auto temp_data
= std::unique_ptr
<BDICompDataBaseDelta
<TB
, TD
>>(
326 new BDICompDataBaseDelta
<TB
, TD
>(encoding
, blkSize
));
328 // Try compressing. Return nullptr if compressor can't compress given line
329 if (temp_data
->compress(data
, blkSize
)) {
330 return std::move(temp_data
);
332 return std::unique_ptr
<BDICompData
>{};
337 BDI::decompress(const BaseCacheCompressor::CompressionData
* comp_data
,
340 // Decompress and go back to host endianness
341 for (std::size_t i
= 0; i
< qwordsPerCacheLine
; i
++)
342 data
[i
] = static_cast<const BDICompData
*>(comp_data
)->access(i
);
345 std::unique_ptr
<BaseCacheCompressor::CompressionData
>
346 BDI::compress(const uint64_t* data
, Cycles
& comp_lat
, Cycles
& decomp_lat
)
348 std::unique_ptr
<BDICompData
> bdi_data
;
350 // Check if it is a zero line
351 if (isZeroPackable(data
)) {
352 bdi_data
= std::unique_ptr
<BDICompData
>(new BDICompDataZeros());
354 // Set compression latency (compare 1 qword per cycle)
355 comp_lat
= Cycles(qwordsPerCacheLine
);
356 // Check if all values in the line are the same
357 } else if (isSameValuePackable(data
)) {
358 bdi_data
= std::unique_ptr
<BDICompData
>(new BDICompDataRep(data
[0]));
360 // Set compression latency (compare 1 qword per cycle)
361 comp_lat
= Cycles(qwordsPerCacheLine
);
363 // Initialize compressed data as an uncompressed instance
364 bdi_data
= std::unique_ptr
<BDICompData
>(
365 new BDICompDataUncompressed(data
, blkSize
));
367 // Base size-delta size ratio. Used to optimize run and try to
368 // compress less combinations when their size is worse than the
369 // current best. It does not reflect the compression ratio, as
370 // it does not take the metadata into account.
371 int base_delta_ratio
= 2;
373 // Check which base-delta size combination is the best. This is
374 // optimized by giving priority to trying the compressor that would
375 // generate the smallest compression size. This way we waste less
376 // simulation time by not doing all possible combinations
377 for (int ratio
= 8; ratio
>= base_delta_ratio
; ratio
/=2) {
378 for (int base_size
= 8; base_size
>= ratio
; base_size
/=2) {
379 // If using more compressors, parse all delta sizes from 1 to
380 // one size smaller than the base size, otherwise just parse
381 // highest possible delta. When we only instantiate one delta
382 // size per base size, we use less area and energy, at the
383 // cost of lower compression efficiency
384 const int delta_size
= base_size
/ratio
;
385 if (!useMoreCompressors
&& delta_size
!= base_size
/2) {
389 std::unique_ptr
<BDICompData
> temp_bdi_data
;
391 // Get the compression result for the current combination
392 if ((base_size
== 8)&&(delta_size
== 4)) {
393 temp_bdi_data
= tryCompress
<uint64_t, int32_t>(data
,
395 } else if ((base_size
== 8)&&(delta_size
== 2)) {
396 temp_bdi_data
= tryCompress
<uint64_t, int16_t>(data
,
398 } else if ((base_size
== 8)&&(delta_size
== 1)) {
399 temp_bdi_data
= tryCompress
<uint64_t, int8_t>(data
,
401 } else if ((base_size
== 4)&&(delta_size
== 2)) {
402 temp_bdi_data
= tryCompress
<uint32_t, int16_t>(data
,
404 } else if ((base_size
== 4)&&(delta_size
== 1)) {
405 temp_bdi_data
= tryCompress
<uint32_t, int8_t>(data
,
407 } else if ((base_size
== 2)&&(delta_size
== 1)) {
408 temp_bdi_data
= tryCompress
<uint16_t, int8_t>(data
,
411 fatal("Invalid combination of base and delta sizes.");
414 // If compressor was successful, check if new compression
415 // improves the compression factor
417 (bdi_data
->getSizeBits() > temp_bdi_data
->getSizeBits()))
419 bdi_data
= std::move(temp_bdi_data
);
420 base_delta_ratio
= ratio
;
423 // Clear temp pointer
424 temp_bdi_data
.reset(nullptr);
428 // Set compression latency. A successful compressor will stop all
429 // other compressors when it knows no other will generate a better
430 // compression. However, this requires either hard-coding, or a complex
431 // function that can calculate the exact size of every compressor for
432 // every cache line size. We decide to take a conservative
433 // optimization: assume that all compressors with a given base size
434 // delta size ratio (no-metadata ratio) must wait for each other.
435 // In the worst case scenario the data is left uncompressed, so
436 // it loses the time of the worst base size (2 bytes per cycle)
437 comp_lat
= Cycles(blkSize
/base_delta_ratio
);
441 encodingStats
[bdi_data
->getEncoding()]++;
443 // Pack compression results (1 extra cycle)
444 comp_lat
+= Cycles(1);
446 // Set decompression latency (latency of an adder)
447 decomp_lat
= Cycles(1);
449 // Print debug information
450 DPRINTF(CacheComp
, "BDI: Compressed cache line to encoding %s (%d bits)\n",
451 bdi_data
->getName(), bdi_data
->getSizeBits());
453 return std::move(bdi_data
);
459 BaseCacheCompressor::regStats();
461 // We store the frequency of each encoding
464 .name(name() + ".encoding")
465 .desc("Number of data entries that were compressed to this encoding.")
468 for (unsigned i
= 0; i
< NUM_ENCODINGS
; ++i
) {
469 encodingStats
.subname(i
, ENCODING_NAMES
[i
]);
470 encodingStats
.subdesc(i
, "Number of data entries that match " \
471 "encoding " + std::string(ENCODING_NAMES
[i
]));
478 return new BDI(this);
481 #undef BYTES_PER_QWORD