From 81e34b308f3738cc9e88ae3066b5ed47c918c9de Mon Sep 17 00:00:00 2001 From: Javier Bueno Date: Thu, 7 Mar 2019 16:13:03 +0100 Subject: [PATCH] mem-cache: Added the STeMS prefetcher Reference: Stephen Somogyi, Thomas F. Wenisch, Anastasia Ailamaki, and Babak Falsafi. 2009. Spatio-temporal memory streaming. In Proceedings of the 36th annual international symposium on Computer architecture (ISCA '09). ACM, New York, NY, USA, 69-80. Change-Id: I58cea1a7faa9391f8aa4469eb4973feabd31097a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16423 Reviewed-by: Daniel Carvalho Maintainer: Nikos Nikoleris --- src/mem/cache/prefetch/Prefetcher.py | 36 +++ src/mem/cache/prefetch/SConscript | 1 + .../spatio_temporal_memory_streaming.cc | 257 ++++++++++++++++++ .../spatio_temporal_memory_streaming.hh | 199 ++++++++++++++ 4 files changed, 493 insertions(+) create mode 100644 src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc create mode 100644 src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index eacdf78f7..cb0a1c3ee 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -408,3 +408,39 @@ class SBOOEPrefetcher(QueuedPrefetcher): sandbox_entries = Param.Int(1024, "Size of the address buffer") score_threshold_pct = Param.Percent(25, "Min. threshold to issue a \ prefetch. The value is the percentage of sandbox entries to use") + +class STeMSPrefetcher(QueuedPrefetcher): + type = "STeMSPrefetcher" + cxx_class = "STeMSPrefetcher" + cxx_header = "mem/cache/prefetch/spatio_temporal_memory_streaming.hh" + + spatial_region_size = Param.MemorySize("2kB", + "Memory covered by a hot zone") + active_generation_table_entries = Param.MemorySize("64", + "Number of entries in the active generation table") + active_generation_table_assoc = Param.Unsigned(64, + "Associativity of the active generation table") + active_generation_table_indexing_policy = Param.BaseIndexingPolicy( + SetAssociative(entry_size = 1, + assoc = Parent.active_generation_table_assoc, + size = Parent.active_generation_table_entries), + "Indexing policy of the active generation table") + active_generation_table_replacement_policy = Param.BaseReplacementPolicy( + LRURP(), "Replacement policy of the active generation table") + + pattern_sequence_table_entries = Param.MemorySize("16384", + "Number of entries in the pattern sequence table") + pattern_sequence_table_assoc = Param.Unsigned(16384, + "Associativity of the pattern sequence table") + pattern_sequence_table_indexing_policy = Param.BaseIndexingPolicy( + SetAssociative(entry_size = 1, + assoc = Parent.pattern_sequence_table_assoc, + size = Parent.pattern_sequence_table_entries), + "Indexing policy of the pattern sequence table") + pattern_sequence_table_replacement_policy = Param.BaseReplacementPolicy( + LRURP(), "Replacement policy of the pattern sequence table") + + region_miss_order_buffer_entries = Param.Unsigned(131072, + "Number of entries of the Region Miss Order Buffer") + reconstruction_entries = Param.Unsigned(256, + "Number of reconstruction entries") diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript index f533b3c52..5b653382a 100644 --- a/src/mem/cache/prefetch/SConscript +++ b/src/mem/cache/prefetch/SConscript @@ -43,5 +43,6 @@ Source('sbooe.cc') Source('signature_path.cc') Source('signature_path_v2.cc') Source('slim_ampm.cc') +Source('spatio_temporal_memory_streaming.cc') Source('stride.cc') Source('tagged.cc') diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc new file mode 100644 index 000000000..df5190a97 --- /dev/null +++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc @@ -0,0 +1,257 @@ +/** + * Copyright (c) 2019 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Javier Bueno + */ + +#include "mem/cache/prefetch/spatio_temporal_memory_streaming.hh" + +#include "debug/HWPrefetch.hh" +#include "mem/cache/prefetch/associative_set_impl.hh" +#include "params/STeMSPrefetcher.hh" + +STeMSPrefetcher::STeMSPrefetcher(const STeMSPrefetcherParams *p) + : QueuedPrefetcher(p), spatialRegionSize(p->spatial_region_size), + spatialRegionSizeBits(floorLog2(p->spatial_region_size)), + reconstructionEntries(p->reconstruction_entries), + activeGenerationTable(p->active_generation_table_assoc, + p->active_generation_table_entries, + p->active_generation_table_indexing_policy, + p->active_generation_table_replacement_policy, + ActiveGenerationTableEntry( + spatialRegionSize / blkSize)), + patternSequenceTable(p->pattern_sequence_table_assoc, + p->pattern_sequence_table_entries, + p->pattern_sequence_table_indexing_policy, + p->pattern_sequence_table_replacement_policy, + ActiveGenerationTableEntry( + spatialRegionSize / blkSize)), + rmob(p->region_miss_order_buffer_entries), rmobHead(0) +{ + fatal_if(!isPowerOf2(spatialRegionSize), + "The spatial region size must be a power of 2."); +} + +void +STeMSPrefetcher::checkForActiveGenerationsEnd() { + // This prefetcher operates attached to the L1 and it observes all + // accesses, this guarantees that no evictions are missed + + // Iterate over all entries, if any recorded cacheline has been evicted, + // the generation finishes, move the entry to the PST + for (auto &agt_entry : activeGenerationTable) { + if (agt_entry.isValid()) { + bool generation_ended = false; + bool sr_is_secure = agt_entry.isSecure(); + for (auto &seq_entry : agt_entry.sequence) { + if (seq_entry.counter > 0) { + Addr cache_addr = + agt_entry.paddress + seq_entry.offset * blkSize; + if (!inCache(cache_addr, sr_is_secure) && + !inMissQueue(cache_addr, sr_is_secure)) { + generation_ended = true; + break; + } + } + } + if (generation_ended) { + // PST is indexed using the PC (secure bit is unused) + ActiveGenerationTableEntry *pst_entry = + patternSequenceTable.findEntry(agt_entry.pc, + false /*unused*/); + if (pst_entry == nullptr) { + // Tipically an entry will not exist + pst_entry = patternSequenceTable.findVictim(agt_entry.pc); + assert(pst_entry != nullptr); + patternSequenceTable.insertEntry(agt_entry.pc, + false /*unused*/, pst_entry); + } else { + patternSequenceTable.accessEntry(pst_entry); + } + // If the entry existed, this will update the values, if not, + // this also sets the values of the entry + pst_entry->update(agt_entry); + // Free the AGT entry + agt_entry.setInvalid(); + } + } + } +} + +void +STeMSPrefetcher::addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta) +{ + RegionMissOrderBufferEntry &rmob_entry = rmob[rmobHead]; + rmobHead = (rmobHead + 1) % rmob.size(); + + rmob_entry.srAddress = sr_addr; + rmob_entry.pstAddress = pst_addr; + rmob_entry.delta = delta; + rmob_entry.valid = true; +} + +void +STeMSPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses) +{ + if (!pfi.hasPC()) { + DPRINTF(HWPrefetch, "Ignoring request with no PC.\n"); + return; + } + + Addr pc = pfi.getPC(); + bool is_secure = pfi.isSecure(); + // Spatial region address + Addr sr_addr = pfi.getAddr() / spatialRegionSize; + Addr paddr = pfi.getPaddr(); + + // Offset in cachelines within the spatial region + Addr sr_offset = (pfi.getAddr() % spatialRegionSize) / blkSize; + + // Check if any active generation has ended + checkForActiveGenerationsEnd(); + + ActiveGenerationTableEntry *agt_entry = + activeGenerationTable.findEntry(sr_addr, is_secure); + if (agt_entry != nullptr) { + // found an entry in the AGT, entry is currently being recorded, + // add the offset + activeGenerationTable.accessEntry(agt_entry); + agt_entry->addOffset(sr_offset); + lastTriggerCounter += 1; + } else { + // Not found, this is the first access (Trigger access) + + // Add entry to RMOB + Addr pst_addr = (pc << spatialRegionSizeBits) + sr_offset; + addToRMOB(sr_addr, pst_addr, lastTriggerCounter); + // Reset last trigger counter + lastTriggerCounter = 0; + + // allocate a new AGT entry + agt_entry = activeGenerationTable.findVictim(sr_addr); + assert(agt_entry != nullptr); + activeGenerationTable.insertEntry(sr_addr, is_secure, agt_entry); + agt_entry->pc = pc; + agt_entry->paddress = paddr; + agt_entry->addOffset(sr_offset); + } + // increase the seq Counter for other entries + for (auto &agt_e : activeGenerationTable) { + if (agt_e.isValid() && agt_entry != &agt_e) { + agt_e.seqCounter += 1; + } + } + + // Prefetch generation: if this is a miss, search for the most recent + // entry in the RMOB, and reconstruct the registered access sequence + if (pfi.isCacheMiss()) { + for (unsigned int idx = (rmobHead - 1) % rmob.size(); + idx != rmobHead && rmob[idx].valid; + idx = (idx - 1) % rmob.size()) + { + if (rmob[idx].srAddress == sr_addr) { + // reconstruct the access sequence + reconstructSequence(idx, addresses); + break; + } + } + } +} + +void +STeMSPrefetcher::reconstructSequence(unsigned int rmob_idx, + std::vector &addresses) +{ + std::vector reconstruction(reconstructionEntries, MaxAddr); + unsigned int idx = 0; + // process rmob entries from rmob_idx (most recent with + // address = sr_addr) to the last one (rmobHead) + for (int i = rmob_idx; + i != rmobHead && idx < reconstructionEntries; + i = (i + 1) % rmob.size()) + { + reconstruction[idx] = rmob[i].srAddress * spatialRegionSize; + unsigned int next_i = (i + 1) % rmob.size(); + idx += rmob[next_i].delta + 1; + } + // Now query the PST with the PC of each RMOB entry + idx = 0; + for (int i = rmob_idx; + i != rmobHead && idx < reconstructionEntries; + i = (i + 1) % rmob.size()) + { + ActiveGenerationTableEntry *pst_entry = + patternSequenceTable.findEntry(rmob[i].pstAddress, + false /* unused */); + if (pst_entry != nullptr) { + patternSequenceTable.accessEntry(pst_entry); + for (auto &seq_entry : pst_entry->sequence) { + if (seq_entry.counter > 1) { + // 3-bit counter: high enough confidence with a + // value greater than 1 + Addr rec_addr = rmob[i].srAddress * spatialRegionSize + + seq_entry.offset; + unsigned ridx = idx + seq_entry.delta; + // Try to use the corresponding position, if it has been + // already used, look the surrounding positions + if (ridx < reconstructionEntries && + reconstruction[ridx] == MaxAddr) { + reconstruction[ridx] = rec_addr; + } else if ((ridx + 1) < reconstructionEntries && + reconstruction[ridx + 1] == MaxAddr) { + reconstruction[ridx + 1] = rec_addr; + } else if ((ridx + 2) < reconstructionEntries && + reconstruction[ridx + 2] == MaxAddr) { + reconstruction[ridx + 2] = rec_addr; + } else if ((ridx > 0) && + ((ridx - 1) < reconstructionEntries) && + reconstruction[ridx - 1] == MaxAddr) { + reconstruction[ridx - 1] = rec_addr; + } else if ((ridx > 1) && + ((ridx - 2) < reconstructionEntries) && + reconstruction[ridx - 2] == MaxAddr) { + reconstruction[ridx - 2] = rec_addr; + } + } + } + } + unsigned int next_i = (i + 1) % rmob.size(); + idx += rmob[next_i].delta + 1; + } + for (Addr pf_addr : reconstruction) { + if (pf_addr != MaxAddr) { + addresses.push_back(AddrPriority(pf_addr, 0)); + } + } +} + +STeMSPrefetcher * +STeMSPrefetcherParams::create() +{ + return new STeMSPrefetcher(this); +} diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh new file mode 100644 index 000000000..a7e25fe02 --- /dev/null +++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh @@ -0,0 +1,199 @@ +/** + * Copyright (c) 2019 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Javier Bueno + */ + + /** + * Implementation of the Spatio-Temporal Memory Streaming Prefetcher (STeMS) + * Reference: + * Spatio-temporal memory streaming. + * Somogyi, S., Wenisch, T. F., Ailamaki, A., & Falsafi, B. (2009). + * ACM SIGARCH Computer Architecture News, 37(3), 69-80. + * + * Notes: + * - The functionality described in the paper as Streamed Value Buffer (SVB) + * is not implemented here, as this is handled by the QueuedPrefetcher class + */ + +#ifndef __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__ +#define __MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__ + +#include + +#include "mem/cache/prefetch/associative_set.hh" +#include "mem/cache/prefetch/queued.hh" + +struct STeMSPrefetcherParams; + +class STeMSPrefetcher : public QueuedPrefetcher +{ + /** Size of each spatial region */ + const size_t spatialRegionSize; + /** log_2 of the spatial region size */ + const size_t spatialRegionSizeBits; + /** Number of reconstruction entries */ + const unsigned int reconstructionEntries; + + /** + * Entry data type for the Active Generation Table (AGT) and the Pattern + * Sequence Table (PST) + */ + struct ActiveGenerationTableEntry : public TaggedEntry { + /** Physical address of the spatial region */ + Addr paddress; + /** PC that started this generation */ + Addr pc; + /** Counter to keep track of the interleaving between sequences */ + unsigned int seqCounter; + + /** Sequence entry data type */ + struct SequenceEntry { + /** 2-bit confidence counter */ + unsigned int counter; + /** Offset, in cache lines, within the spatial region */ + unsigned int offset; + /** Intearleaving position on the global access sequence */ + unsigned int delta; + SequenceEntry() : counter(0), offset(0), delta(0) + {} + }; + /** Sequence of accesses */ + std::vector sequence; + + ActiveGenerationTableEntry(int num_positions) : paddress(0), pc(0), + seqCounter(0), sequence(num_positions) + {} + + void reset() override + { + paddress = 0; + pc = 0; + seqCounter = 0; + for (auto &seq_entry : sequence) { + seq_entry.counter = 0; + seq_entry.offset = 0; + seq_entry.delta = 0; + } + } + + /** + * Update the entry data with an entry from a generation that just + * ended. This operation can not be done with the copy constructor, + * becasuse the TaggedEntry component must not be copied. + * @param e entry which generation has ended + */ + void update(ActiveGenerationTableEntry const &e) + { + paddress = e.paddress; + pc = e.pc; + seqCounter = e.seqCounter; + sequence = e.sequence; + } + + /** + * Add a new access to the sequence + * @param offset offset in cachelines within the spatial region + */ + void addOffset(unsigned int offset) { + // Search for the offset in the deltas array, if it exist, update + // the corresponding counter, if not, add the offset to the array + for (auto &seq_entry : sequence) { + if (seq_entry.counter > 0) { + if (seq_entry.offset == offset) { + //2 bit counter, saturates at 3 + if (seq_entry.counter < 3) { + seq_entry.counter += 1; + } + } + } else { + // If the counter is 0 it means that this position is not + // being used, and we can allocate the new offset here + seq_entry.counter = 1; + seq_entry.offset = offset; + seq_entry.delta = seqCounter; + break; + } + } + seqCounter = 0; + } + }; + + /** Active Generation Table (AGT) */ + AssociativeSet activeGenerationTable; + /** Pattern Sequence Table (PST) */ + AssociativeSet patternSequenceTable; + + /** Data type of the Region Miss Order Buffer entry */ + struct RegionMissOrderBufferEntry { + /** Address of the spatial region */ + Addr srAddress; + /** + * Address used to index the PST table, generated using the PC and the + * offset within the spatial region + */ + Addr pstAddress; + /** Delta within the global miss order sequence */ + unsigned int delta; + /** Valid bit */ + bool valid; + }; + + /** Region Miss Order Buffer (RMOB) */ + std::vector rmob; + /** First free position (or older, if it is full) of the RMOB */ + unsigned int rmobHead; + + /** Counter to keep the count of accesses between trigger accesses */ + unsigned int lastTriggerCounter; + + /** Checks if the active generations have ended */ + void checkForActiveGenerationsEnd(); + /** + * Adds an entry to the RMOB + * @param sr_addr Spatial region address + * @param pst_addr Corresponding PST address + * @param delta Number of entries skipped in the global miss order + */ + void addToRMOB(Addr sr_addr, Addr pst_addr, unsigned int delta); + + /** + * Reconstructs a sequence of accesses and generates the prefetch + * addresses, adding them to the addresses vector + * @param rmob_idx rmob position to start generating from + * @param addresses vector to add the addresses to be prefetched + */ + void reconstructSequence(unsigned int rmob_idx, + std::vector &addresses); + public: + STeMSPrefetcher(const STeMSPrefetcherParams* p); + ~STeMSPrefetcher() {} + void calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses) override; +}; + +#endif//__MEM_CACHE_PREFETCH_SPATIO_TEMPORAL_MEMORY_STREAMING_HH__ -- 2.30.2