mem-ruby: Use CircularQueue for prefetcher's non unit filter
authorDaniel R. Carvalho <odanrc@yahoo.com.br>
Sat, 21 Dec 2019 13:55:30 +0000 (14:55 +0100)
committerDaniel Carvalho <odanrc@yahoo.com.br>
Fri, 9 Oct 2020 07:13:16 +0000 (07:13 +0000)
Ruby prefetcher's non-unit filter is a circular queue, so use the class
created for this functionality.

This changes the behavior, since previously iterating through the
filter was completely arbitrary, and now it iterates from the
beginning of the queue to the end when accessing and updating
the filter's contents.

Change-Id: I3148efcbef00da0c8f6cf2dee7fb86f6c2ddb27d
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/24533
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
src/mem/ruby/structures/RubyPrefetcher.cc
src/mem/ruby/structures/RubyPrefetcher.hh

index 02526aa57e518d77b748cf1a3abbaf35eb675ab3..b416269e3650129a808c782591c22ca4282ebcde 100644 (file)
@@ -58,30 +58,14 @@ RubyPrefetcher::RubyPrefetcher(const Params *p)
     : SimObject(p), m_num_streams(p->num_streams),
     m_array(p->num_streams), m_train_misses(p->train_misses),
     m_num_startup_pfs(p->num_startup_pfs),
-    m_num_nonunit_filters(p->nonunit_filter),
     unitFilter(p->unit_filter),
     negativeFilter(p->unit_filter),
-    m_nonunit_filter(p->nonunit_filter, 0),
+    nonUnitFilter(p->nonunit_filter),
     m_prefetch_cross_pages(p->cross_page),
     m_page_shift(p->sys->getPageShift())
 {
     assert(m_num_streams > 0);
     assert(m_num_startup_pfs <= MAX_PF_INFLIGHT);
-
-    // create nonunit stride filter
-    m_nonunit_index = 0;
-    m_nonunit_stride = new int[m_num_nonunit_filters];
-    m_nonunit_hit    = new uint32_t[m_num_nonunit_filters];
-    for (int i =0; i < m_num_nonunit_filters; i++) {
-        m_nonunit_stride[i] = 0;
-        m_nonunit_hit[i]    = 0;
-    }
-}
-
-RubyPrefetcher::~RubyPrefetcher()
-{
-    delete m_nonunit_stride;
-    delete m_nonunit_hit;
 }
 
 void
@@ -180,7 +164,7 @@ RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type)
 
     // check to see if this address is in the non-unit stride filter
     int stride = 0;  // NULL value
-    hit = accessNonunitFilter(address, &stride, alloc);
+    hit = accessNonunitFilter(line_addr, &stride, alloc);
     if (alloc) {
         assert(stride != 0);  // ensure non-zero stride prefetches
         initializeStream(line_addr, stride, getLRUindex(), type);
@@ -265,14 +249,6 @@ RubyPrefetcher::getLRUindex(void)
     return lru_index;
 }
 
-void
-RubyPrefetcher::clearNonunitEntry(uint32_t index)
-{
-    m_nonunit_filter[index] = 0;
-    m_nonunit_stride[index] = 0;
-    m_nonunit_hit[index]    = 0;
-}
-
 void
 RubyPrefetcher::initializeStream(Addr address, int stride,
      uint32_t index, const RubyRequestType& type)
@@ -358,49 +334,46 @@ RubyPrefetcher::accessUnitFilter(CircularQueue<UnitFilterEntry>* const filter,
 }
 
 bool
-RubyPrefetcher::accessNonunitFilter(Addr address, int *stride,
-    bool &alloc)
+RubyPrefetcher::accessNonunitFilter(Addr line_addr, int *stride, bool &alloc)
 {
     //reset the alloc flag
     alloc = false;
 
     /// look for non-unit strides based on a (user-defined) page size
-    Addr page_addr = pageAddress(address);
-    Addr line_addr = makeLineAddress(address);
+    Addr page_addr = pageAddress(line_addr);
 
-    for (uint32_t i = 0; i < m_num_nonunit_filters; i++) {
-        if (pageAddress(m_nonunit_filter[i]) == page_addr) {
+    for (auto& entry : nonUnitFilter) {
+        if (pageAddress(entry.addr) == page_addr) {
             // hit in the non-unit filter
             // compute the actual stride (for this reference)
-            int delta = line_addr - m_nonunit_filter[i];
+            int delta = line_addr - entry.addr;
 
             if (delta != 0) {
                 // no zero stride prefetches
                 // check that the stride matches (for the last N times)
-                if (delta == m_nonunit_stride[i]) {
+                if (delta == entry.stride) {
                     // -> stride hit
                     // increment count (if > 2) allocate stream
-                    m_nonunit_hit[i]++;
-                    if (m_nonunit_hit[i] > m_train_misses) {
+                    entry.hits++;
+                    if (entry.hits > m_train_misses) {
                         // This stride HAS to be the multiplicative constant of
                         // dataBlockBytes (bc makeNextStrideAddress is
                         // calculated based on this multiplicative constant!)
-                        *stride = m_nonunit_stride[i] /
-                                    RubySystem::getBlockSizeBytes();
+                        *stride = entry.stride /
+                            RubySystem::getBlockSizeBytes();
 
                         // clear this filter entry
-                        clearNonunitEntry(i);
+                        entry.clear();
                         alloc = true;
                     }
                 } else {
-                    // delta didn't match ... reset m_nonunit_hit count for
-                    // this entry
-                    m_nonunit_hit[i] = 0;
+                    // If delta didn't match reset entry's hit count
+                    entry.hits = 0;
                 }
 
                 // update the last address seen & the stride
-                m_nonunit_stride[i] = delta;
-                m_nonunit_filter[i] = line_addr;
+                entry.addr = line_addr;
+                entry.stride = delta;
                 return true;
             } else {
                 return false;
@@ -409,14 +382,8 @@ RubyPrefetcher::accessNonunitFilter(Addr address, int *stride,
     }
 
     // not found: enter this address in the table
-    m_nonunit_filter[m_nonunit_index] = line_addr;
-    m_nonunit_stride[m_nonunit_index] = 0;
-    m_nonunit_hit[m_nonunit_index]    = 0;
+    nonUnitFilter.push_back(NonUnitFilterEntry(line_addr));
 
-    m_nonunit_index = m_nonunit_index + 1;
-    if (m_nonunit_index >= m_num_nonunit_filters) {
-        m_nonunit_index = 0;
-    }
     return false;
 }
 
@@ -437,10 +404,10 @@ RubyPrefetcher::print(std::ostream& out) const
 
     // print out non-unit stride filter
     out << "non-unit table:\n";
-    for (int i = 0; i < m_num_nonunit_filters; i++) {
-        out << m_nonunit_filter[i] << " "
-            << m_nonunit_stride[i] << " "
-            << m_nonunit_hit[i] << std::endl;
+    for (const auto& entry : nonUnitFilter) {
+        out << entry.addr << " "
+            << entry.stride << " "
+            << entry.hits << std::endl;
     }
 
     // print out allocated stream buffers
index ebf59bd3b86a8b915ded8731a90f32a9ae914b2c..8e08fdf621f8ba51d41f3d824a59e9163a99f893 100644 (file)
@@ -97,7 +97,7 @@ class RubyPrefetcher : public SimObject
     public:
         typedef RubyPrefetcherParams Params;
         RubyPrefetcher(const Params *p);
-        ~RubyPrefetcher();
+        ~RubyPrefetcher() = default;
 
         void issueNextPrefetch(Addr address, PrefetchEntry *stream);
         /**
@@ -139,6 +139,25 @@ class RubyPrefetcher : public SimObject
             }
         };
 
+        struct NonUnitFilterEntry : public UnitFilterEntry
+        {
+            /** Stride (in # of cache lines). */
+            int stride;
+
+            NonUnitFilterEntry(Addr _addr = 0)
+              : UnitFilterEntry(_addr), stride(0)
+            {
+            }
+
+            void
+            clear()
+            {
+                addr = 0;
+                stride = 0;
+                hits = 0;
+            }
+        };
+
         /**
          * Returns an unused stream buffer (or if all are used, returns the
          * least recently used (accessed) stream buffer).
@@ -146,9 +165,6 @@ class RubyPrefetcher : public SimObject
          */
         uint32_t getLRUindex(void);
 
-        //! clear a non-unit stride prefetcher entry
-        void clearNonunitEntry(uint32_t index);
-
         //! allocate a new stream buffer at a specific index
         void initializeStream(Addr address, int stride,
             uint32_t index, const RubyRequestType& type);
@@ -171,9 +187,17 @@ class RubyPrefetcher : public SimObject
         bool accessUnitFilter(CircularQueue<UnitFilterEntry>* const filter,
             Addr line_addr, int stride, bool &alloc);
 
-        /// access a unit stride filter to determine if there is a hit
-        bool accessNonunitFilter(Addr address, int *stride,
-            bool &alloc);
+        /**
+         * Access a non-unit stride filter to determine if there is a hit, and
+         * update it otherwise.
+         *
+         * @param line_addr Address being accessed, block aligned.
+         * @param stride The stride value.
+         * @param alloc Whether a stream should be allocated on a hit.
+         * @return True if a corresponding entry was found and its stride is
+         *         not zero.
+         */
+        bool accessNonunitFilter(Addr line_addr, int *stride, bool &alloc);
 
         /// determine the page aligned address
         Addr pageAddress(Addr addr) const;
@@ -187,8 +211,6 @@ class RubyPrefetcher : public SimObject
         uint32_t m_train_misses;
         //! number of initial prefetches to startup a stream
         uint32_t m_num_startup_pfs;
-        //! number of non-stride filters
-        uint32_t m_num_nonunit_filters;
 
         /**
          * A unit stride filter array: helps reduce BW requirement
@@ -202,16 +224,11 @@ class RubyPrefetcher : public SimObject
          */
         CircularQueue<UnitFilterEntry> negativeFilter;
 
-        /// a non-unit stride filter array: helps reduce BW requirement of
-        /// prefetching
-        std::vector<Addr> m_nonunit_filter;
-        /// An array of strides (in # of cache lines) for the filter entries
-        int *m_nonunit_stride;
-        /// An array used to count the of times particular filter entries
-        /// have been hit
-        uint32_t *m_nonunit_hit;
-        /// a round robin pointer into the unit filter group
-        uint32_t m_nonunit_index;
+        /**
+         * A non-unit stride filter array: helps reduce BW requirement of
+         * prefetching.
+         */
+        CircularQueue<NonUnitFilterEntry> nonUnitFilter;
 
         /// Used for allowing prefetches across pages.
         bool m_prefetch_cross_pages;