Michaud, P. (2015, June). A best-offset prefetcher.
In 2nd Data Prefetching Championship.
Change-Id: I61bb89ca5639356d54aeb04e856d5bf6e8805c22
Reviewed-on: https://gem5-review.googlesource.com/c/14820
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
writeAllocator->allocate() : mshr->allocOnFill();
blk = handleFill(pkt, blk, writebacks, allocate);
assert(blk != nullptr);
+ ppFill->notify(pkt);
}
if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
{
ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit");
ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss");
+ ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill");
}
///////////////
/** To probe when a cache miss occurs */
ProbePointArg<PacketPtr> *ppMiss;
+ /** To probe when a cache fill occurs */
+ ProbePointArg<PacketPtr> *ppFill;
+
/**
* The writeAllocator drive optimizations for streaming writes.
* It first determines whether a WriteReq MSHR should be delayed,
dcpt = Param.DeltaCorrelatingPredictionTables(
SlimDeltaCorrelatingPredictionTables(),
"Delta Correlating Prediction Tables object")
+
+class BOPPrefetcher(QueuedPrefetcher):
+ type = "BOPPrefetcher"
+ cxx_class = "BOPPrefetcher"
+ cxx_header = "mem/cache/prefetch/bop.hh"
+ score_max = Param.Unsigned(31, "Max. score to update the best offset")
+ round_max = Param.Unsigned(100, "Max. round to update the best offset")
+ bad_score = Param.Unsigned(10, "Score at which the HWP is disabled")
+ rr_size = Param.Unsigned(64, "Number of entries of each RR bank")
+ tag_bits = Param.Unsigned(12, "Bits used to store the tag")
+ offset_list_size = Param.Unsigned(46,
+ "Number of entries in the offsets list")
+ negative_offsets_enable = Param.Bool(True,
+ "Initialize the offsets list also with negative values \
+ (i.e. the table will have half of the entries with positive \
+ offsets and the other half with negative ones)")
+ delay_queue_enable = Param.Bool(True, "Enable the delay queue")
+ delay_queue_size = Param.Unsigned(15,
+ "Number of entries in the delay queue")
+ delay_queue_cycles = Param.Cycles(60,
+ "Cycles to delay a write in the left RR table from the delay \
+ queue")
Source('access_map_pattern_matching.cc')
Source('base.cc')
+Source('bop.cc')
Source('delta_correlating_prediction_tables.cc')
Source('irregular_stream_buffer.cc')
Source('queued.cc')
void
BasePrefetcher::PrefetchListener::notify(const PacketPtr &pkt)
{
- parent.probeNotify(pkt);
+ if (isFill) {
+ parent.notifyFill(pkt);
+ } else {
+ parent.probeNotify(pkt);
+ }
}
BasePrefetcher::BasePrefetcher(const BasePrefetcherParams *p)
if (listeners.empty() && cache != nullptr) {
ProbeManager *pm(cache->getProbeManager());
listeners.push_back(new PrefetchListener(*this, pm, "Miss"));
+ listeners.push_back(new PrefetchListener(*this, pm, "Fill", true));
if (prefetchOnAccess) {
listeners.push_back(new PrefetchListener(*this, pm, "Hit"));
}
{
public:
PrefetchListener(BasePrefetcher &_parent, ProbeManager *pm,
- const std::string &name)
+ const std::string &name, bool _isFill = false)
: ProbeListenerArgBase(pm, name),
- parent(_parent) {}
+ parent(_parent), isFill(_isFill) {}
void notify(const PacketPtr &pkt) override;
protected:
BasePrefetcher &parent;
+ bool isFill;
};
std::vector<PrefetchListener *> listeners;
*/
virtual void notify(const PacketPtr &pkt, const PrefetchInfo &pfi) = 0;
+ /** Notify prefetcher of cache fill */
+ virtual void notifyFill(const PacketPtr &pkt)
+ {}
+
virtual PacketPtr getPacket() = 0;
virtual Tick nextPrefetchReadyTime() const = 0;
--- /dev/null
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+#include "mem/cache/prefetch/bop.hh"
+
+#include "debug/HWPrefetch.hh"
+#include "params/BOPPrefetcher.hh"
+
+BOPPrefetcher::BOPPrefetcher(const BOPPrefetcherParams *p)
+ : QueuedPrefetcher(p),
+ scoreMax(p->score_max), roundMax(p->round_max),
+ badScore(p->bad_score), rrEntries(p->rr_size),
+ tagMask((1 << p->tag_bits) - 1),
+ delayQueueEnabled(p->delay_queue_enable),
+ delayQueueSize(p->delay_queue_size),
+ delayTicks(cyclesToTicks(p->delay_queue_cycles)),
+ delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
+ issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
+ bestScore(0), round(0)
+{
+ if (!isPowerOf2(rrEntries)) {
+ fatal("%s: number of RR entries is not power of 2\n", name());
+ }
+ if (!isPowerOf2(blkSize)) {
+ fatal("%s: cache line size is not power of 2\n", name());
+ }
+ if (!(p->negative_offsets_enable && (p->offset_list_size % 2 == 0))) {
+ fatal("%s: negative offsets enabled with odd offset list size\n",
+ name());
+ }
+
+ rrLeft.resize(rrEntries);
+ rrRight.resize(rrEntries);
+
+ // Following the paper implementation, a list with the specified number
+ // of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
+ const int factors[] = { 2, 3, 5 };
+ unsigned int i = 0;
+ int64_t offset_i = 1;
+
+ while (i < p->offset_list_size)
+ {
+ int64_t offset = offset_i;
+
+ for (int n : factors) {
+ while ((offset % n) == 0) {
+ offset /= n;
+ }
+ }
+
+ if (offset == 1) {
+ offsetsList.push_back(OffsetListEntry(offset_i, 0));
+ i++;
+ // If we want to use negative offsets, add also the negative value
+ // of the offset just calculated
+ if (p->negative_offsets_enable) {
+ offsetsList.push_back(OffsetListEntry(-offset_i, 0));
+ i++;
+ }
+ }
+
+ offset_i++;
+ }
+
+ offsetsListIterator = offsetsList.begin();
+}
+
+void
+BOPPrefetcher::delayQueueEventWrapper()
+{
+ while (!delayQueue.empty() &&
+ delayQueue.front().processTick <= curTick())
+ {
+ Addr addr_x = delayQueue.front().baseAddr;
+ insertIntoRR(addr_x, RRWay::Left);
+ delayQueue.pop_front();
+ }
+
+ // Schedule an event for the next element if there is one
+ if (!delayQueue.empty()) {
+ schedule(delayQueueEvent, delayQueue.front().processTick);
+ }
+}
+
+unsigned int
+BOPPrefetcher::hash(Addr addr, unsigned int way) const
+{
+ Addr hash1 = addr >> way;
+ Addr hash2 = hash1 >> floorLog2(rrEntries);
+ return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
+}
+
+void
+BOPPrefetcher::insertIntoRR(Addr addr, unsigned int way)
+{
+ switch (way) {
+ case RRWay::Left:
+ rrLeft[hash(addr, RRWay::Left)] = addr;
+ break;
+ case RRWay::Right:
+ rrRight[hash(addr, RRWay::Right)] = addr;
+ break;
+ }
+}
+
+void
+BOPPrefetcher::insertIntoDelayQueue(Addr x)
+{
+ if (delayQueue.size() == delayQueueSize) {
+ return;
+ }
+
+ // Add the address to the delay queue and schedule an event to process
+ // it after the specified delay cycles
+ Tick process_tick = curTick() + delayTicks;
+
+ delayQueue.push_back(DelayQueueEntry(x, process_tick));
+
+ if (!delayQueueEvent.scheduled()) {
+ schedule(delayQueueEvent, process_tick);
+ }
+}
+
+void
+BOPPrefetcher::resetScores()
+{
+ for (auto& it : offsetsList) {
+ it.second = 0;
+ }
+}
+
+inline Addr
+BOPPrefetcher::tag(Addr addr) const
+{
+ return (addr >> blkSize) & tagMask;
+}
+
+bool
+BOPPrefetcher::testRR(Addr addr) const
+{
+ for (auto& it : rrLeft) {
+ if (it == addr) {
+ return true;
+ }
+ }
+
+ for (auto& it : rrRight) {
+ if (it == addr) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void
+BOPPrefetcher::bestOffsetLearning(Addr x)
+{
+ Addr offset_addr = (*offsetsListIterator).first;
+ Addr lookup_addr = x - offset_addr;
+
+ // There was a hit in the RR table, increment the score for this offset
+ if (testRR(lookup_addr)) {
+ DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
+ (*offsetsListIterator).second++;
+ if ((*offsetsListIterator).second > bestScore) {
+ bestScore = (*offsetsListIterator).second;
+ phaseBestOffset = (*offsetsListIterator).first;
+ DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
+ }
+ }
+
+ offsetsListIterator++;
+
+ // All the offsets in the list were visited meaning that a learning
+ // phase finished. Check if
+ if (offsetsListIterator == offsetsList.end()) {
+ offsetsListIterator = offsetsList.begin();
+ round++;
+
+ // Check if the best offset must be updated if:
+ // (1) One of the scores equals SCORE_MAX
+ // (2) The number of rounds equals ROUND_MAX
+ if ((bestScore >= scoreMax) || (round == roundMax)) {
+ bestOffset = phaseBestOffset;
+ round = 0;
+ bestScore = 0;
+ phaseBestOffset = 0;
+ resetScores();
+ issuePrefetchRequests = true;
+ } else if (phaseBestOffset <= badScore) {
+ issuePrefetchRequests = false;
+ }
+ }
+}
+
+void
+BOPPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses)
+{
+ Addr addr = pfi.getAddr();
+ Addr tag_x = tag(addr);
+
+ if (delayQueueEnabled) {
+ insertIntoDelayQueue(tag_x);
+ } else {
+ insertIntoRR(tag_x, RRWay::Left);
+ }
+
+ // Go through the nth offset and update the score, the best score and the
+ // current best offset if a better one is found
+ bestOffsetLearning(tag_x);
+
+ // This prefetcher is a degree 1 prefetch, so it will only generate one
+ // prefetch at most per access
+ if (issuePrefetchRequests) {
+ Addr prefetch_addr = addr + (bestOffset << lBlkSize);
+ addresses.push_back(AddrPriority(prefetch_addr, 0));
+ DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
+ }
+}
+
+void
+BOPPrefetcher::notifyFill(const PacketPtr& pkt)
+{
+ // Only insert into the RR right way if it's the pkt is a HWP
+ if (!pkt->cmd.isHWPrefetch()) return;
+
+ Addr tag_y = tag(pkt->getAddr());
+
+ if (issuePrefetchRequests) {
+ insertIntoRR(tag_y - bestOffset, RRWay::Right);
+ }
+}
+
+BOPPrefetcher*
+BOPPrefetcherParams::create()
+{
+ return new BOPPrefetcher(this);
+}
--- /dev/null
+/**
+ * Copyright (c) 2018 Metempsy Technology Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ivan Pizarro
+ */
+
+/**
+ * Implementation of the 'A Best-Offset Prefetcher'
+ * Reference:
+ * Michaud, P. (2015, June). A best-offset prefetcher.
+ * In 2nd Data Prefetching Championship.
+ */
+
+#ifndef __MEM_CACHE_PREFETCH_BOP_HH__
+#define __MEM_CACHE_PREFETCH_BOP_HH__
+
+#include <queue>
+
+#include "mem/cache/prefetch/queued.hh"
+#include "mem/packet.hh"
+
+struct BOPPrefetcherParams;
+
+class BOPPrefetcher : public QueuedPrefetcher
+{
+ private:
+
+ enum RRWay {
+ Left,
+ Right
+ };
+
+ /** Learning phase parameters */
+ const unsigned int scoreMax;
+ const unsigned int roundMax;
+ const unsigned int badScore;
+ /** Recent requests table parameteres */
+ const unsigned int rrEntries;
+ const unsigned int tagMask;
+ /** Delay queue parameters */
+ const bool delayQueueEnabled;
+ const unsigned int delayQueueSize;
+ const unsigned int delayTicks;
+
+ std::vector<Addr> rrLeft;
+ std::vector<Addr> rrRight;
+
+ /** Structure to save the offset and the score */
+ typedef std::pair<int16_t, uint8_t> OffsetListEntry;
+ std::vector<OffsetListEntry> offsetsList;
+
+ /** In a first implementation of the BO prefetcher, both banks of the
+ * RR were written simultaneously when a prefetched line is inserted
+ * into the cache. Adding the delay queue tries to avoid always
+ * striving for timeless prefetches, which has been found to not
+ * always being optimal.
+ */
+ struct DelayQueueEntry
+ {
+ Addr baseAddr;
+ Tick processTick;
+
+ DelayQueueEntry(Addr x, Tick t) : baseAddr(x), processTick(t)
+ {}
+ };
+
+ std::deque<DelayQueueEntry> delayQueue;
+
+ /** Event to handle the delay queue processing */
+ void delayQueueEventWrapper();
+ EventFunctionWrapper delayQueueEvent;
+
+ /** Hardware prefetcher enabled */
+ bool issuePrefetchRequests;
+ /** Current best offset to issue prefetches */
+ Addr bestOffset;
+ /** Current best offset found in the learning phase */
+ Addr phaseBestOffset;
+ /** Current test offset index */
+ std::vector<OffsetListEntry>::iterator offsetsListIterator;
+ /** Max score found so far */
+ unsigned int bestScore;
+ /** Current round */
+ unsigned int round;
+
+ /** Generate a hash for the specified address to index the RR table
+ * @param addr: address to hash
+ * @param way: RR table to which is addressed (left/right)
+ */
+ unsigned int hash(Addr addr, unsigned int way) const;
+
+ /** Insert the specified address into the RR table
+ * @param addr: address to insert
+ * @param way: RR table to which the address will be inserted
+ */
+ void insertIntoRR(Addr addr, unsigned int way);
+
+ /** Insert the specified address into the delay queue. This will
+ * trigger an event after the delay cycles pass
+ * @param addr: address to insert into the delay queue
+ */
+ void insertIntoDelayQueue(Addr addr);
+
+ /** Reset all the scores from the offset list */
+ void resetScores();
+
+ /** Generate the tag for the specified address based on the tag bits
+ * and the block size
+ * @param addr: address to get the tag from
+ */
+ Addr tag(Addr addr) const;
+
+ /** Test if @X-O is hitting in the RR table to update the
+ offset score */
+ bool testRR(Addr) const;
+
+ /** Learning phase of the BOP. Update the intermediate values of the
+ round and update the best offset if found */
+ void bestOffsetLearning(Addr);
+
+ /** Update the RR right table after a prefetch fill */
+ void notifyFill(const PacketPtr& pkt) override;
+
+ public:
+
+ BOPPrefetcher(const BOPPrefetcherParams *p);
+ ~BOPPrefetcher() {}
+
+ void calculatePrefetch(const PrefetchInfo &pfi,
+ std::vector<AddrPriority> &addresses);
+};
+
+#endif /* __MEM_CACHE_PREFETCH_BOP_HH__ */