cpu: Fix SMT scheduling issue with the O3 cpu

author Mitch Hayenga <mitch.hayenga@arm.com>

Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)

committer Mitch Hayenga <mitch.hayenga@arm.com>

Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)
author Mitch Hayenga <mitch.hayenga@arm.com>
Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)
committer Mitch Hayenga <mitch.hayenga@arm.com>
Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py

index c70a12f1de6edf7e1dc7651288f7161fc9dcad68..4d215328e1016c6356a8f49343f626934b79589b 100644 (file)
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -61,7 +61,8 @@ class DerivO3CPU(BaseCPU):
      commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay")
      fetchWidth = Param.Unsigned(8, "Fetch width")
      fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes")
-    fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops")
+    fetchQueueSize = Param.Unsigned(32, "Fetch queue size in micro-ops "
+                                    "per-thread")
  
      renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay")
      iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode "
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh

index 473e5e51d8523b79b85a631359236e3f8c772636..4c9ccf1eba8d5f4eb0ebefcff886015aef9b6b31 100644 (file)
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012, 2014 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -218,6 +218,9 @@ class DefaultCommit
      /** Takes over from another CPU's thread. */
      void takeOverFrom();
  
+    /** Deschedules a thread from scheduling */
+    void deactivateThread(ThreadID tid);
+
      /** Ticks the commit stage, which tries to commit instructions. */
      void tick();
  
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh

index 45c231adb72325b727a57b4f05e988e33ba830ac..347b23359f519081fe925e8e4094c4c369cf6c7b 100644 (file)
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2014 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -461,6 +461,19 @@ DefaultCommit<Impl>::takeOverFrom()
      rob->takeOverFrom();
  }
  
+template <class Impl>
+void
+DefaultCommit<Impl>::deactivateThread(ThreadID tid)
+{
+    list<ThreadID>::iterator thread_it = std::find(priority_list.begin(),
+            priority_list.end(), tid);
+
+    if (thread_it != priority_list.end()) {
+        priority_list.erase(thread_it);
+    }
+}
+
+
  template <class Impl>
  void
  DefaultCommit<Impl>::updateStatus()
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc

index 4f48e29d9e5519c909399d770bfb0ca211a8c785..2055d63b6c46844d3f8b7cc94d0ea5a51303405b 100644 (file)
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2014 ARM Limited
   * Copyright (c) 2013 Advanced Micro Devices, Inc.
   * All rights reserved
   *
@@ -728,6 +728,9 @@ FullO3CPU<Impl>::deactivateThread(ThreadID tid)
                  tid);
          activeThreads.erase(thread_it);
      }
+
+    fetch.deactivateThread(tid);
+    commit.deactivateThread(tid);
  }
  
  template <class Impl>
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh

index 2e9428ef14ee2b2bd26e6f9bf01d6db521606de5..4d01610d976bb4146830908bc2964f5d1aaba646 100644 (file)
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -255,6 +255,8 @@ class DefaultFetch
      /** Tells fetch to wake up from a quiesce instruction. */
      void wakeFromQuiesce();
  
+    /** For priority-based fetch policies, need to keep update priorityList */
+    void deactivateThread(ThreadID tid);
    private:
      /** Reset this pipeline stage */
      void resetStage();
@@ -484,8 +486,8 @@ class DefaultFetch
      /** The size of the fetch queue in micro-ops */
      unsigned fetchQueueSize;
  
-    /** Queue of fetched instructions */
-    std::deque<DynInstPtr> fetchQueue;
+    /** Queue of fetched instructions. Per-thread to prevent HoL blocking. */
+    std::deque<DynInstPtr> fetchQueue[Impl::MaxThreads];
  
      /** Whether or not the fetch buffer data is valid. */
      bool fetchBufferValid[Impl::MaxThreads];
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh

index 219444ace7f4b294abb349fa76a942f4c983a373..d583ae7b6306073c26cb50bd0ac94a76809d49cb 100644 (file)
--- a/src/cpu/o3/fetch_impl.hh
+++ b/src/cpu/o3/fetch_impl.hh
@@ -54,6 +54,7 @@
  #include "arch/tlb.hh"
  #include "arch/utility.hh"
  #include "arch/vtophys.hh"
+#include "base/random.hh"
  #include "base/types.hh"
  #include "config/the_isa.hh"
  #include "cpu/base.hh"
@@ -342,7 +343,6 @@ DefaultFetch<Impl>::resetStage()
      cacheBlocked = false;
  
      priorityList.clear();
-    fetchQueue.clear();
  
      // Setup PC and nextPC with initial state.
      for (ThreadID tid = 0; tid < numThreads; ++tid) {
@@ -360,6 +360,8 @@ DefaultFetch<Impl>::resetStage()
          fetchBufferPC[tid] = 0;
          fetchBufferValid[tid] = false;
  
+        fetchQueue[tid].clear();
+
          priorityList.push_back(tid);
      }
  
@@ -450,14 +452,18 @@ DefaultFetch<Impl>::isDrained() const
       * drain other components).
       */
      for (ThreadID i = 0; i < numThreads; ++i) {
-        if (!(fetchStatus[i] == Idle ||
-              (fetchStatus[i] == Blocked && stalls[i].drain)))
+        // Verify fetch queues are drained
+        if (!fetchQueue[i].empty())
              return false;
-    }
  
-    // Not drained if fetch queue contains entries
-    if (!fetchQueue.empty())
-        return false;
+        // Return false if not idle or drain stalled
+        if (fetchStatus[i] != Idle) {
+            if (fetchStatus[i] == Blocked && stalls[i].drain)
+                continue;
+            else
+                return false;
+        }
+    }
  
      /* The pipeline might start up again in the middle of the drain
       * cycle if the finish translation event is scheduled, so make
@@ -521,6 +527,17 @@ DefaultFetch<Impl>::switchToInactive()
      }
  }
  
+template <class Impl>
+void
+DefaultFetch<Impl>::deactivateThread(ThreadID tid)
+{
+    // Update priority list
+    auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
+    if (thread_it != priorityList.end()) {
+        priorityList.erase(thread_it);
+    }
+}
+
  template <class Impl>
  bool
  DefaultFetch<Impl>::lookupAndUpdateNextPC(
@@ -679,7 +696,7 @@ DefaultFetch<Impl>::finishTranslation(Fault fault, RequestPtr mem_req)
          }
      } else {
          // Don't send an instruction to decode if we can't handle it.
-        if (!(numInst < fetchWidth) || !(fetchQueue.size() < fetchQueueSize)) {
+        if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
              assert(!finishTranslationEvent.scheduled());
              finishTranslationEvent.setFault(fault);
              finishTranslationEvent.setReq(mem_req);
@@ -761,13 +778,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
      fetchStatus[tid] = Squashing;
  
      // Empty fetch queue
-    auto inst_itr = fetchQueue.begin();
-    while (inst_itr != fetchQueue.end()) {
-        if ((*inst_itr)->threadNumber == tid)
-            inst_itr = fetchQueue.erase(inst_itr);
-         else
-            ++inst_itr;
-    }
+    fetchQueue[tid].clear();
  
      // microops are being squashed, it is not known wheather the
      // youngest non-squashed microop was  marked delayed commit
@@ -915,13 +926,6 @@ DefaultFetch<Impl>::tick()
          _status = updateFetchStatus();
      }
  
-    // If there was activity this cycle, inform the CPU of it.
-    if (wroteToTimeBuffer || cpu->contextSwitch) {
-        DPRINTF(Activity, "Activity this cycle.\n");
-
-        cpu->activityThisCycle();
-    }
-
      // Issue the next I-cache request if possible.
      for (ThreadID i = 0; i < Impl::MaxThreads; ++i) {
          if (issuePipelinedIfetch[i]) {
@@ -931,17 +935,45 @@ DefaultFetch<Impl>::tick()
  
      // Send instructions enqueued into the fetch queue to decode.
      // Limit rate by fetchWidth.  Stall if decode is stalled.
-    unsigned instsToDecode = 0;
-    while(!fetchQueue.empty() &&
-          instsToDecode < decodeWidth &&
-          !stalls[fetchQueue.front()->threadNumber].decode) {
-        auto inst = fetchQueue.front();
-        toDecode->insts[toDecode->size++] = inst;
-        DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
-                "fetch queue. Fetch queue size: %i.\n",
-                inst->threadNumber, inst->seqNum, fetchQueue.size());
-        fetchQueue.pop_front();
-        instsToDecode++;
+    unsigned insts_to_decode = 0;
+    unsigned available_insts = 0;
+
+    for (auto tid : *activeThreads) {
+        if (!stalls[tid].decode) {
+            available_insts += fetchQueue[tid].size();
+        }
+    }
+
+    // Pick a random thread to start trying to grab instructions from
+    auto tid_itr = activeThreads->begin();
+    std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
+
+    while (available_insts != 0 && insts_to_decode < decodeWidth) {
+        ThreadID tid = *tid_itr;
+        if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
+            auto inst = fetchQueue[tid].front();
+            toDecode->insts[toDecode->size++] = inst;
+            DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "
+                    "fetch queue. Fetch queue size: %i.\n",
+                    tid, inst->seqNum, fetchQueue[tid].size());
+
+            wroteToTimeBuffer = true;
+            fetchQueue[tid].pop_front();
+            insts_to_decode++;
+            available_insts--;
+        }
+
+        tid_itr++;
+        // Wrap around if at end of active threads list
+        if (tid_itr == activeThreads->end())
+            tid_itr = activeThreads->begin();
+    }
+
+    // If there was activity this cycle, inform the CPU of it.
+    if (wroteToTimeBuffer || cpu->contextSwitch) {
+        DPRINTF(Activity, "Activity this cycle.\n");
+
+        cpu->activityThisCycle();
      }
  
      // Reset the number of the instruction we've fetched.
@@ -1095,10 +1127,10 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
      // Write the instruction to the first slot in the queue
      // that heads to decode.
      assert(numInst < fetchWidth);
-    fetchQueue.push_back(instruction);
-    assert(fetchQueue.size() <= fetchQueueSize);
+    fetchQueue[tid].push_back(instruction);
+    assert(fetchQueue[tid].size() <= fetchQueueSize);
      DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",
-            tid, fetchQueue.size(), fetchQueueSize);
+            tid, fetchQueue[tid].size(), fetchQueueSize);
      //toDecode->insts[toDecode->size++] = instruction;
  
      // Keep track of if we can take an interrupt at this boundary
@@ -1213,7 +1245,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
      // Loop through instruction memory from the cache.
      // Keep issuing while fetchWidth is available and branch is not
      // predicted taken
-    while (numInst < fetchWidth && fetchQueue.size() < fetchQueueSize
+    while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
             && !predictedBranch) {
          // We need to process more memory if we aren't going to get a
          // StaticInst from the rom, the current macroop, or what's already
@@ -1337,7 +1369,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
                  break;
              }
          } while ((curMacroop || decoder[tid]->instReady()) &&
-                 numInst < fetchWidth && fetchQueue.size() < fetchQueueSize);
+                 numInst < fetchWidth &&
+                 fetchQueue[tid].size() < fetchQueueSize);
      }
  
      if (predictedBranch) {
author	Mitch Hayenga <mitch.hayenga@arm.com>
	Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)
committer	Mitch Hayenga <mitch.hayenga@arm.com>
	Wed, 3 Sep 2014 11:42:37 +0000 (07:42 -0400)
src/cpu/o3/O3CPU.py		patch \| blob \| history
src/cpu/o3/commit.hh		patch \| blob \| history
src/cpu/o3/commit_impl.hh		patch \| blob \| history
src/cpu/o3/cpu.cc		patch \| blob \| history
src/cpu/o3/fetch.hh		patch \| blob \| history
src/cpu/o3/fetch_impl.hh		patch \| blob \| history