From 4628d87e3abbcad0c5a95b6a4562b8ac8c6f4661 Mon Sep 17 00:00:00 2001 From: Ivan Pizarro Date: Mon, 25 Feb 2019 14:30:55 +0100 Subject: [PATCH] mem-cache: Proactive Instruction Fetch Implementation Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December). Proactive instruction fetch. In Proceedings of the 44th Annual IEEE/ACM International Symposium on Microarchitecture (pp. 152-162). ACM. Change-Id: I38c3ab30a94ab279f03e3d5936ce8ed118310c0e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16968 Reviewed-by: Nikos Nikoleris Reviewed-by: Daniel Carvalho Maintainer: Nikos Nikoleris --- src/mem/cache/prefetch/Prefetcher.py | 40 +++++ src/mem/cache/prefetch/SConscript | 1 + src/mem/cache/prefetch/pif.cc | 259 +++++++++++++++++++++++++++ src/mem/cache/prefetch/pif.hh | 191 ++++++++++++++++++++ 4 files changed, 491 insertions(+) create mode 100644 src/mem/cache/prefetch/pif.cc create mode 100644 src/mem/cache/prefetch/pif.hh diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index cb0a1c3ee..404a44240 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -43,6 +43,7 @@ from m5.SimObject import * from m5.params import * from m5.proxy import * +from m5.objects.BaseCPU import BaseCPU from m5.objects.ClockedObject import ClockedObject from m5.objects.IndexingPolicies import * from m5.objects.ReplacementPolicies import * @@ -444,3 +445,42 @@ class STeMSPrefetcher(QueuedPrefetcher): "Number of entries of the Region Miss Order Buffer") reconstruction_entries = Param.Unsigned(256, "Number of reconstruction entries") + +class HWPProbeEventRetiredInsts(HWPProbeEvent): + def register(self): + if self.obj: + for name in self.names: + self.prefetcher.getCCObject().addEventProbeRetiredInsts( + self.obj.getCCObject(), name) + +class PIFPrefetcher(QueuedPrefetcher): + type = 'PIFPrefetcher' + cxx_class = 'PIFPrefetcher' + cxx_header = "mem/cache/prefetch/pif.hh" + cxx_exports = [ + PyBindMethod("addEventProbeRetiredInsts"), + ] + + prec_spatial_region_bits = Param.Unsigned(2, + "Number of preceding addresses in the spatial region") + succ_spatial_region_bits = Param.Unsigned(8, + "Number of subsequent addresses in the spatial region") + compactor_entries = Param.Unsigned(2, "Entries in the temp. compactor") + stream_address_buffer_entries = Param.Unsigned(7, "Entries in the SAB") + history_buffer_size = Param.Unsigned(16, "Entries in the history buffer") + + index_entries = Param.MemorySize("64", + "Number of entries in the index") + index_assoc = Param.Unsigned(64, + "Associativity of the index") + index_indexing_policy = Param.BaseIndexingPolicy( + SetAssociative(entry_size = 1, assoc = Parent.index_assoc, + size = Parent.index_entries), + "Indexing policy of the index") + index_replacement_policy = Param.BaseReplacementPolicy(LRURP(), + "Replacement policy of the index") + + def listenFromProbeRetiredInstructions(self, simObj): + if not isinstance(simObj, BaseCPU): + raise TypeError("argument must be of BaseCPU type") + self.addEvent(HWPProbeEventRetiredInsts(self, simObj,"RetiredInstsPC")) diff --git a/src/mem/cache/prefetch/SConscript b/src/mem/cache/prefetch/SConscript index 5b653382a..0dae74e27 100644 --- a/src/mem/cache/prefetch/SConscript +++ b/src/mem/cache/prefetch/SConscript @@ -38,6 +38,7 @@ Source('bop.cc') Source('delta_correlating_prediction_tables.cc') Source('irregular_stream_buffer.cc') Source('indirect_memory.cc') +Source('pif.cc') Source('queued.cc') Source('sbooe.cc') Source('signature_path.cc') diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc new file mode 100644 index 000000000..04064d497 --- /dev/null +++ b/src/mem/cache/prefetch/pif.cc @@ -0,0 +1,259 @@ +/** + * Copyright (c) 2019 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ivan Pizarro + */ + +#include "mem/cache/prefetch/pif.hh" + +#include +#include + +#include "debug/HWPrefetch.hh" +#include "mem/cache/prefetch/associative_set_impl.hh" +#include "params/PIFPrefetcher.hh" + +PIFPrefetcher::PIFPrefetcher(const PIFPrefetcherParams *p) + : QueuedPrefetcher(p), + precSize(p->prec_spatial_region_bits), + succSize(p->succ_spatial_region_bits), + maxCompactorEntries(p->compactor_entries), + maxStreamAddressBufferEntries(p->stream_address_buffer_entries), + historyBuffer(p->history_buffer_size), + historyBufferTail(0), + index(p->index_assoc, p->index_entries, p->index_indexing_policy, + p->index_replacement_policy), + streamAddressBuffer(), listenersPC() +{ +} + +PIFPrefetcher::CompactorEntry::CompactorEntry(Addr addr, + unsigned int prec_size, unsigned int succ_size) +{ + trigger = addr; + prec.resize(prec_size, false); + succ.resize(succ_size, false); +} + +unsigned int +PIFPrefetcher::CompactorEntry::distanceFromTrigger(Addr target, + unsigned int log_blk_size) const { + const Addr target_blk = target >> log_blk_size; + const Addr trigger_blk = trigger >> log_blk_size; + + return std::abs(target_blk - trigger_blk); +} + +bool +PIFPrefetcher::CompactorEntry::inSameSpatialRegion(Addr pc, + unsigned int log_blk_size, bool update) +{ + unsigned int blk_distance = distanceFromTrigger(pc, log_blk_size); + + bool hit = (pc > trigger) ? + (succ.size() >= blk_distance) : (prec.size() >= blk_distance); + if (hit && update) { + if (pc > trigger) { + succ[blk_distance - 1] = true; + } else if (pc < trigger) { + prec[blk_distance - 1] = true; + } + } + return hit; +} + +bool +PIFPrefetcher::CompactorEntry::hasAddress(Addr target, + unsigned int log_blk_size) const +{ + unsigned int blk_distance = distanceFromTrigger(target, log_blk_size); + bool hit = false; + if (target > trigger) { + hit = blk_distance <= succ.size() && succ[blk_distance - 1]; + } else if (target < trigger) { + hit = blk_distance <= prec.size() && succ[blk_distance - 1]; + } else { + hit = true; + } + return hit; +} + +void +PIFPrefetcher::CompactorEntry::getPredictedAddresses(unsigned int log_blk_size, + std::vector &addresses) const +{ + // Calculate the addresses of the instruction blocks that are encoded + // by the bit vector and issue prefetch requests for these addresses. + // Predictions are made by traversing the bit vector from left to right + // as this typically predicts the accesses in the order they will be + // issued in the core. + const Addr trigger_blk = trigger >> log_blk_size; + for (int i = prec.size()-1; i >= 0; i--) { + // Address from the preceding blocks to issue a prefetch + if (prec[i]) { + const Addr prec_addr = (trigger_blk - (i+1)) << log_blk_size; + addresses.push_back(AddrPriority(prec_addr, 0)); + } + } + for (int i = 0; i < succ.size(); i++) { + // Address from the succeding blocks to issue a prefetch + if (succ[i]) { + const Addr succ_addr = (trigger_blk + (i+1)) << log_blk_size; + addresses.push_back(AddrPriority(succ_addr, 0)); + } + } +} + +void +PIFPrefetcher::notifyRetiredInst(const Addr pc) +{ + // First access to the prefetcher + if (temporalCompactor.size() == 0) { + spatialCompactor = CompactorEntry(pc, precSize, succSize); + } else { + // If the PC of the instruction retired is in the same spatial region + // than the last trigger address, update the bit vectors based on the + // distance between them + if (spatialCompactor.inSameSpatialRegion(pc, lBlkSize, true)) { + // If the PC of the instruction retired is outside the latest spatial + // region, check if it matches in any of the regions in the temporal + // compactor and update it to the MRU position + } else { + bool is_in_temporal_compactor = false; + + // Check if the PC is in the temporal compactor + for (auto it = temporalCompactor.begin(); + it != temporalCompactor.end(); it++) + { + if (it->inSameSpatialRegion(pc, lBlkSize, false)) { + spatialCompactor = (*it); + temporalCompactor.erase(it); + is_in_temporal_compactor = true; + break; + } + } + + if (temporalCompactor.size() == maxCompactorEntries) { + temporalCompactor.pop_front(); // Discard the LRU entry + } + + temporalCompactor.push_back(spatialCompactor); + + // If the compactor entry is neither the spatial or can't be + // found in the temporal compactor, reset the spatial compactor + // updating the trigger address and resetting the vector bits + if (!is_in_temporal_compactor) { + // Insert the spatial entry into the history buffer and update + // the 'index' table to point to the new entry + historyBuffer[historyBufferTail] = spatialCompactor; + + IndexEntry *idx_entry = + index.findEntry(spatialCompactor.trigger, false); + if (idx_entry != nullptr) { + index.accessEntry(idx_entry); + } else { + idx_entry = index.findVictim(spatialCompactor.trigger); + assert(idx_entry != nullptr); + index.insertEntry(spatialCompactor.trigger, false, + idx_entry); + } + idx_entry->historyIndex = historyBufferTail; + + historyBufferTail++; + if (historyBufferTail == historyBuffer.size()) { + historyBufferTail = 0; + } + + // Reset the spatial compactor fields with the new address + spatialCompactor = CompactorEntry(pc, precSize, succSize); + } + } + } +} + +void +PIFPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses) +{ + const Addr addr = pfi.getAddr(); + + // First check if the access has been prefetched, this is done by + // comparing the access against the active Stream Address Buffers + for (auto &sabEntry : streamAddressBuffer) { + if (sabEntry->hasAddress(addr, lBlkSize)) { + // Advance to the next entry (first check if we have reached the + // end of the history buffer) + if (sabEntry == &(historyBuffer[historyBuffer.size() - 1])) { + sabEntry = &(historyBuffer[0]); + } else { + sabEntry++; + } + sabEntry->getPredictedAddresses(lBlkSize, addresses); + // We are done + return; + } + } + + // Check if a valid entry in the 'index' table is found and allocate a new + // active prediction stream + IndexEntry *idx_entry = index.findEntry(addr, /* unused */ false); + + if (idx_entry != nullptr) { + index.accessEntry(idx_entry); + // Trigger address from the 'index' table and index to the history + // buffer + const unsigned int hb_entry = idx_entry->historyIndex; + CompactorEntry *entry = &historyBuffer[hb_entry]; + + // Track the block in the Stream Address Buffer + if (streamAddressBuffer.size() == maxStreamAddressBufferEntries) { + streamAddressBuffer.pop_front(); + } + streamAddressBuffer.push_back(entry); + + entry->getPredictedAddresses(lBlkSize, addresses); + } +} + +void +PIFPrefetcher::PrefetchListenerPC::notify(const Addr& pc) +{ + parent.notifyRetiredInst(pc); +} + +void +PIFPrefetcher::addEventProbeRetiredInsts(SimObject *obj, const char *name) +{ + ProbeManager *pm(obj->getProbeManager()); + listenersPC.push_back(new PrefetchListenerPC(*this, pm, name)); +} + +PIFPrefetcher* +PIFPrefetcherParams::create() +{ + return new PIFPrefetcher(this); +} diff --git a/src/mem/cache/prefetch/pif.hh b/src/mem/cache/prefetch/pif.hh new file mode 100644 index 000000000..6516b2c6a --- /dev/null +++ b/src/mem/cache/prefetch/pif.hh @@ -0,0 +1,191 @@ +/** + * Copyright (c) 2019 Metempsy Technology Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ivan Pizarro + */ + +/** Implementation of the 'Proactive Instruction Fetch' prefetcher + * Reference: + * Ferdman, M., Kaynak, C., & Falsafi, B. (2011, December). + * Proactive instruction fetch. + * In Proceedings of the 44th Annual IEEE/ACM International Symposium + * on Microarchitecture (pp. 152-162). ACM. + */ + +#ifndef __MEM_CACHE_PREFETCH_PIF_HH__ +#define __MEM_CACHE_PREFETCH_PIF_HH__ + +#include +#include + +#include "mem/cache/prefetch/associative_set.hh" +#include "mem/cache/prefetch/queued.hh" + +struct PIFPrefetcherParams; + +class PIFPrefetcher : public QueuedPrefetcher +{ + private: + /** Number of preceding and subsequent spatial addresses to compact */ + const unsigned int precSize; + const unsigned int succSize; + /** Number of entries used for the temporal compactor */ + const unsigned int maxCompactorEntries; + /** Max number of entries to be used in the Stream Address Buffer */ + const unsigned int maxStreamAddressBufferEntries; + + /** + * The compactor tracks retired instructions addresses, leveraging the + * spatial and temporal locality among instructions for compaction. It + *comprises the spatial and temporal compaction mechanisms. + * + * Taking advantage of the spatial locality across instruction blocks, + * the spatial compactor combines instruction-block addresses that fall + * within a 'spatial region', a group of adjacent instruction blocks. + * When an instruction outside the current spatial region retires, the + * existing spatial region is sent to the temporal compactor. + * + * The temporal compactor tracks a small number of the + * most-recently-observed spatial region records. + */ + struct CompactorEntry { + Addr trigger; + std::vector prec; + std::vector succ; + CompactorEntry() {} + CompactorEntry(Addr, unsigned int, unsigned int); + + /** + * Checks if a given address is in the same defined spatial region + * as the compactor entry. + * @param addr Address to check if it's inside the spatial region + * @param log_blk_distance log_2(block size of the cache) + * @param update if true, set the corresponding succ/prec entry + * @return TRUE if they are in the same spatial region, FALSE + * otherwise + */ + bool inSameSpatialRegion(Addr addr, unsigned int log_blk_size, + bool update); + /** + * Checks if the provided address is contained in this spatial + * region and if its corresponding bit vector entry is set + * @param target address to check + * @param log_blk_distance log_2(block size of the cache) + * @return TRUE if target has its bit set + */ + bool hasAddress(Addr target, unsigned int log_blk_size) const; + + /** + * Fills the provided vector with the predicted addresses using the + * recorded bit vectors of the entry + * @param log_blk_distance log_2(block size of the cache) + * @param addresses reference to a vector to add the generated + * addresses + */ + void getPredictedAddresses(unsigned int log_blk_size, + std::vector &addresses) const; + private: + /** + * Computes the distance, in cache blocks, from an address to the + * trigger of the entry. + * @param addr address to compute the distance from the trigger + * @param log_blk_distance log_2(block size of the cache) + * @result distance in cache blocks from the address to the trigger + */ + unsigned int distanceFromTrigger(Addr addr, + unsigned int log_blk_size) const; + }; + + CompactorEntry spatialCompactor; + std::deque temporalCompactor; + + /** + * History buffer is a circular buffer that stores the sequence of + * retired instructions in FIFO order. + */ + std::vector historyBuffer; + unsigned int historyBufferTail; + + struct IndexEntry : public TaggedEntry + { + unsigned int historyIndex; + }; + /** + * The index table is a small cache-like structure that facilitates + * fast search of the history buffer. + */ + AssociativeSet index; + + /** + * A Stream Address Buffer (SAB) tracks a window of consecutive + * spatial regions. The SAB mantains a pointer to the sequence in the + * history buffer, initiallly set to the pointer taken from the index + * table + */ + std::deque streamAddressBuffer; + + /** + * Updates the prefetcher structures upon an instruction retired + * @param pc PC of the instruction being retired + */ + void notifyRetiredInst(const Addr pc); + + /** + * Probe Listener to handle probe events from the CPU + */ + class PrefetchListenerPC : public ProbeListenerArgBase + { + public: + PrefetchListenerPC(PIFPrefetcher &_parent, ProbeManager *pm, + const std::string &name) + : ProbeListenerArgBase(pm, name), + parent(_parent) {} + void notify(const Addr& pc) override; + protected: + PIFPrefetcher &parent; + }; + + /** Array of probe listeners */ + std::vector listenersPC; + + + public: + PIFPrefetcher(const PIFPrefetcherParams *p); + ~PIFPrefetcher() {} + + void calculatePrefetch(const PrefetchInfo &pfi, + std::vector &addresses); + + /** + * Add a SimObject and a probe name to monitor the retired instructions + * @param obj The SimObject pointer to listen from + * @param name The probe name + */ + void addEventProbeRetiredInsts(SimObject *obj, const char *name); +}; + +#endif // __MEM_CACHE_PREFETCH_PIF_HH__ -- 2.30.2