cpu: Add SMT support to MinorCPU

author Mitch Hayenga <mitch.hayenga@arm.com>

Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)

committer Mitch Hayenga <mitch.hayenga@arm.com>

Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)
author Mitch Hayenga <mitch.hayenga@arm.com>
Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)
committer Mitch Hayenga <mitch.hayenga@arm.com>
Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)
diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py

index 9ab7b0b39e00a453c4e75521c9f3eae1cc618185..2c80af175059e0184952492386f18d18c5493e49 100644 (file)
--- a/src/cpu/minor/MinorCPU.py
+++ b/src/cpu/minor/MinorCPU.py
@@ -169,6 +169,8 @@ class MinorDefaultFUPool(MinorFUPool):
          MinorDefaultFloatSimdFU(), MinorDefaultMemFU(),
          MinorDefaultMiscFU()]
  
+class ThreadPolicy(Enum): vals = ['SingleThreaded', 'RoundRobin', 'Random']
+
  class MinorCPU(BaseCPU):
      type = 'MinorCPU'
      cxx_header = "cpu/minor/cpu.hh"
@@ -185,6 +187,8 @@ class MinorCPU(BaseCPU):
      def support_take_over(cls):
          return True
  
+    threadPolicy = Param.ThreadPolicy('RoundRobin',
+            "Thread scheduling policy")
      fetch1FetchLimit = Param.Unsigned(1,
          "Number of line fetches allowable in flight at once")
      fetch1LineSnapWidth = Param.Unsigned(0,
diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc

index 79807a2a765b78745517bed01bd3111d149f6346..016a60f47204d7a3acd1eaa85da9207b54b535a4 100644 (file)
--- a/src/cpu/minor/cpu.cc
+++ b/src/cpu/minor/cpu.cc
@@ -47,32 +47,33 @@
  #include "debug/Quiesce.hh"
  
  MinorCPU::MinorCPU(MinorCPUParams *params) :
-    BaseCPU(params)
+    BaseCPU(params),
+    threadPolicy(params->threadPolicy)
  {
      /* This is only written for one thread at the moment */
      Minor::MinorThread *thread;
  
-    if (FullSystem) {
-        thread = new Minor::MinorThread(this, 0, params->system, params->itb,
-            params->dtb, params->isa[0]);
-    } else {
-        /* thread_id 0 */
-        thread = new Minor::MinorThread(this, 0, params->system,
-            params->workload[0], params->itb, params->dtb, params->isa[0]);
-    }
-
-    threads.push_back(thread);
+    for (ThreadID i = 0; i < numThreads; i++) {
+        if (FullSystem) {
+            thread = new Minor::MinorThread(this, i, params->system,
+                    params->itb, params->dtb, params->isa[i]);
+            thread->setStatus(ThreadContext::Halted);
+        } else {
+            thread = new Minor::MinorThread(this, i, params->system,
+                    params->workload[i], params->itb, params->dtb,
+                    params->isa[i]);
+        }
  
-    thread->setStatus(ThreadContext::Halted);
+        threads.push_back(thread);
+        ThreadContext *tc = thread->getTC();
+        threadContexts.push_back(tc);
+    }
  
-    ThreadContext *tc = thread->getTC();
  
      if (params->checker) {
          fatal("The Minor model doesn't support checking (yet)\n");
      }
  
-    threadContexts.push_back(tc);
-
      Minor::MinorDynInst::init();
  
      pipeline = new Minor::Pipeline(*this, *params);
@@ -137,9 +138,6 @@ MinorCPU::serializeThread(CheckpointOut &cp, ThreadID thread_id) const
  void
  MinorCPU::unserializeThread(CheckpointIn &cp, ThreadID thread_id)
  {
-    if (thread_id != 0)
-        fatal("Trying to load more than one thread into a MinorCPU\n");
-
      threads[thread_id]->unserialize(cp);
  }
  
@@ -170,11 +168,11 @@ void
  MinorCPU::wakeup(ThreadID tid)
  {
      DPRINTF(Drain, "[tid:%d] MinorCPU wakeup\n", tid);
+    assert(tid < numThreads);
  
-    if (threads[tid]->status() == ThreadContext::Suspended)
+    if (threads[tid]->status() == ThreadContext::Suspended) {
          threads[tid]->activate();
-
-    DPRINTF(Drain,"Suspended Processor awoke\n");
+    }
  }
  
  void
@@ -187,13 +185,10 @@ MinorCPU::startup()
      for (auto i = threads.begin(); i != threads.end(); i ++)
          (*i)->startup();
  
-    /* Workaround cases in SE mode where a thread is activated with an
-     * incorrect PC that is updated after the call to activate. This
-     * causes problems for Minor since it instantiates a virtual
-     * branch instruction when activateContext() is called which ends
-     * up pointing to an illegal address.  */
-    if (threads[0]->status() == ThreadContext::Active)
-        activateContext(0);
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        threads[tid]->startup();
+        pipeline->wakeupFetch(tid);
+    }
  }
  
  DrainState
@@ -246,6 +241,7 @@ MinorCPU::drainResume()
  
      for (ThreadID tid = 0; tid < numThreads; tid++)
          wakeup(tid);
+
      pipeline->drainResume();
  }
  
@@ -278,7 +274,7 @@ MinorCPU::takeOverFrom(BaseCPU *old_cpu)
  void
  MinorCPU::activateContext(ThreadID thread_id)
  {
-    DPRINTF(MinorCPU, "ActivateContext thread: %d", thread_id);
+    DPRINTF(MinorCPU, "ActivateContext thread: %d\n", thread_id);
  
      /* Do some cycle accounting.  lastStopped is reset to stop the
       *  wakeup call on the pipeline from adding the quiesce period
@@ -289,7 +285,7 @@ MinorCPU::activateContext(ThreadID thread_id)
      /* Wake up the thread, wakeup the pipeline tick */
      threads[thread_id]->activate();
      wakeupOnEvent(Minor::Pipeline::CPUStageId);
-    pipeline->wakeupFetch();
+    pipeline->wakeupFetch(thread_id);
  
      BaseCPU::activateContext(thread_id);
  }
@@ -317,9 +313,6 @@ MinorCPU::wakeupOnEvent(unsigned int stage_id)
  MinorCPU *
  MinorCPUParams::create()
  {
-    numThreads = 1;
-    if (!FullSystem && workload.size() != 1)
-        panic("only one workload allowed");
      return new MinorCPU(this);
  }
  
diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh

index dad015e8933f2b29163373f54cca3ab518fda824..4e4762390034fde5cbc11b274d3386437b295030 100644 (file)
--- a/src/cpu/minor/cpu.hh
+++ b/src/cpu/minor/cpu.hh
@@ -50,6 +50,7 @@
  #include "cpu/minor/stats.hh"
  #include "cpu/base.hh"
  #include "cpu/simple_thread.hh"
+#include "enums/ThreadPolicy.hh"
  #include "params/MinorCPU.hh"
  
  namespace Minor
@@ -109,6 +110,8 @@ class MinorCPU : public BaseCPU
  
      };
  
+    /** Thread Scheduling Policy (RoundRobin, Random, etc) */
+    Enums::ThreadPolicy threadPolicy;
    protected:
       /** Return a reference to the data port. */
      MasterPort &getDataPort() override;
@@ -162,6 +165,26 @@ class MinorCPU : public BaseCPU
      void activateContext(ThreadID thread_id) override;
      void suspendContext(ThreadID thread_id) override;
  
+    /** Thread scheduling utility functions */
+    std::vector<ThreadID> roundRobinPriority(ThreadID priority)
+    {
+        std::vector<ThreadID> prio_list;
+        for (ThreadID i = 1; i <= numThreads; i++) {
+            prio_list.push_back((priority + i) % numThreads);
+        }
+        return prio_list;
+    }
+
+    std::vector<ThreadID> randomPriority()
+    {
+        std::vector<ThreadID> prio_list;
+        for (ThreadID i = 0; i < numThreads; i++) {
+            prio_list.push_back(i);
+        }
+        std::random_shuffle(prio_list.begin(), prio_list.end());
+        return prio_list;
+    }
+
      /** Interface for stages to signal that they have become active after
       *  a callback or eventq event where the pipeline itself may have
       *  already been idled.  The stage argument should be from the
diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc

index 94eee2be31e1918f727d37e714ec99f37dda69ff..6243bca0119a9410ecf0df587cb09521a800f520 100644 (file)
--- a/src/cpu/minor/decode.cc
+++ b/src/cpu/minor/decode.cc
@@ -49,7 +49,7 @@ Decode::Decode(const std::string &name,
      MinorCPUParams &params,
      Latch<ForwardInstData>::Output inp_,
      Latch<ForwardInstData>::Input out_,
-    Reservable &next_stage_input_buffer) :
+    std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
      Named(name),
      cpu(cpu_),
      inp(inp_),
@@ -57,11 +57,8 @@ Decode::Decode(const std::string &name,
      nextStageReserve(next_stage_input_buffer),
      outputWidth(params.executeInputWidth),
      processMoreThanOneInput(params.decodeCycleInput),
-    inputBuffer(name + ".inputBuffer", "insts", params.decodeInputBufferSize),
-    inputIndex(0),
-    inMacroop(false),
-    execSeqNum(InstId::firstExecSeqNum),
-    blocked(false)
+    decodeInfo(params.numThreads),
+    threadPriority(0)
  {
      if (outputWidth < 1)
          fatal("%s: executeInputWidth must be >= 1 (%d)\n", name, outputWidth);
@@ -70,29 +67,37 @@ Decode::Decode(const std::string &name,
          fatal("%s: decodeInputBufferSize must be >= 1 (%d)\n", name,
          params.decodeInputBufferSize);
      }
+
+    /* Per-thread input buffers */
+    for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+        inputBuffer.push_back(
+            InputBuffer<ForwardInstData>(
+                name + ".inputBuffer" + std::to_string(tid), "insts",
+                params.decodeInputBufferSize));
+    }
  }
  
  const ForwardInstData *
-Decode::getInput()
+Decode::getInput(ThreadID tid)
  {
      /* Get insts from the inputBuffer to work with */
-    if (!inputBuffer.empty()) {
-        const ForwardInstData &head = inputBuffer.front();
+    if (!inputBuffer[tid].empty()) {
+        const ForwardInstData &head = inputBuffer[tid].front();
  
-        return (head.isBubble() ? NULL : &(inputBuffer.front()));
+        return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
      } else {
          return NULL;
      }
  }
  
  void
-Decode::popInput()
+Decode::popInput(ThreadID tid)
  {
-    if (!inputBuffer.empty())
-        inputBuffer.pop();
+    if (!inputBuffer[tid].empty())
+        inputBuffer[tid].pop();
  
-    inputIndex = 0;
-    inMacroop = false;
+    decodeInfo[tid].inputIndex = 0;
+    decodeInfo[tid].inMacroop = false;
  }
  
  #if TRACING_ON
@@ -117,32 +122,37 @@ dynInstAddTracing(MinorDynInstPtr inst, StaticInstPtr static_inst,
  void
  Decode::evaluate()
  {
-    inputBuffer.setTail(*inp.outputWire);
+    /* Push input onto appropriate input buffer */
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);
+
      ForwardInstData &insts_out = *out.inputWire;
  
      assert(insts_out.isBubble());
  
-    blocked = false;
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+        decodeInfo[tid].blocked = !nextStageReserve[tid].canReserve();
  
-    if (!nextStageReserve.canReserve()) {
-        blocked = true;
-    } else {
-        const ForwardInstData *insts_in = getInput();
+    ThreadID tid = getScheduledThread();
+
+    if (tid != InvalidThreadID) {
+        DecodeThreadInfo &decode_info = decodeInfo[tid];
+        const ForwardInstData *insts_in = getInput(tid);
  
          unsigned int output_index = 0;
  
          /* Pack instructions into the output while we can.  This may involve
           * using more than one input line */
          while (insts_in &&
-           inputIndex < insts_in->width() && /* Still more input */
+           decode_info.inputIndex < insts_in->width() && /* Still more input */
             output_index < outputWidth /* Still more output to fill */)
          {
-            MinorDynInstPtr inst = insts_in->insts[inputIndex];
+            MinorDynInstPtr inst = insts_in->insts[decode_info.inputIndex];
  
              if (inst->isBubble()) {
                  /* Skip */
-                inputIndex++;
-                inMacroop = false;
+                decode_info.inputIndex++;
+                decode_info.inMacroop = false;
              } else {
                  StaticInstPtr static_inst = inst->staticInst;
                  /* Static inst of a macro-op above the output_inst */
@@ -153,25 +163,26 @@ Decode::evaluate()
                      DPRINTF(Decode, "Fault being passed: %d\n",
                          inst->fault->name());
  
-                    inputIndex++;
-                    inMacroop = false;
+                    decode_info.inputIndex++;
+                    decode_info.inMacroop = false;
                  } else if (static_inst->isMacroop()) {
                      /* Generate a new micro-op */
                      StaticInstPtr static_micro_inst;
  
                      /* Set up PC for the next micro-op emitted */
-                    if (!inMacroop) {
-                        microopPC = inst->pc;
-                        inMacroop = true;
+                    if (!decode_info.inMacroop) {
+                        decode_info.microopPC = inst->pc;
+                        decode_info.inMacroop = true;
                      }
  
                      /* Get the micro-op static instruction from the
                       * static_inst. */
                      static_micro_inst =
-                        static_inst->fetchMicroop(microopPC.microPC());
+                        static_inst->fetchMicroop(
+                                decode_info.microopPC.microPC());
  
                      output_inst = new MinorDynInst(inst->id);
-                    output_inst->pc = microopPC;
+                    output_inst->pc = decode_info.microopPC;
                      output_inst->staticInst = static_micro_inst;
                      output_inst->fault = NoFault;
  
@@ -185,45 +196,46 @@ Decode::evaluate()
                      DPRINTF(Decode, "Microop decomposition inputIndex:"
                          " %d output_index: %d lastMicroop: %s microopPC:"
                          " %d.%d inst: %d\n",
-                        inputIndex, output_index,
+                        decode_info.inputIndex, output_index,
                          (static_micro_inst->isLastMicroop() ?
                              "true" : "false"),
-                        microopPC.instAddr(), microopPC.microPC(),
+                        decode_info.microopPC.instAddr(),
+                        decode_info.microopPC.microPC(),
                          *output_inst);
  
                      /* Acknowledge that the static_inst isn't mine, it's my
                       * parent macro-op's */
                      parent_static_inst = static_inst;
  
-                    static_micro_inst->advancePC(microopPC);
+                    static_micro_inst->advancePC(decode_info.microopPC);
  
                      /* Step input if this is the last micro-op */
                      if (static_micro_inst->isLastMicroop()) {
-                        inputIndex++;
-                        inMacroop = false;
+                        decode_info.inputIndex++;
+                        decode_info.inMacroop = false;
                      }
                  } else {
                      /* Doesn't need decomposing, pass on instruction */
                      DPRINTF(Decode, "Passing on inst: %s inputIndex:"
                          " %d output_index: %d\n",
-                        *output_inst, inputIndex, output_index);
+                        *output_inst, decode_info.inputIndex, output_index);
  
                      parent_static_inst = static_inst;
  
                      /* Step input */
-                    inputIndex++;
-                    inMacroop = false;
+                    decode_info.inputIndex++;
+                    decode_info.inMacroop = false;
                  }
  
                  /* Set execSeqNum of output_inst */
-                output_inst->id.execSeqNum = execSeqNum;
+                output_inst->id.execSeqNum = decode_info.execSeqNum;
                  /* Add tracing */
  #if TRACING_ON
                  dynInstAddTracing(output_inst, parent_static_inst, cpu);
  #endif
  
                  /* Step to next sequence number */
-                execSeqNum++;
+                decode_info.execSeqNum++;
  
                  /* Correctly size the output before writing */
                  if (output_index == 0) insts_out.resize(outputWidth);
@@ -233,17 +245,17 @@ Decode::evaluate()
              }
  
              /* Have we finished with the input? */
-            if (inputIndex == insts_in->width()) {
+            if (decode_info.inputIndex == insts_in->width()) {
                  /* If we have just been producing micro-ops, we *must* have
                   * got to the end of that for inputIndex to be pushed past
                   * insts_in->width() */
-                assert(!inMacroop);
-                popInput();
+                assert(!decode_info.inMacroop);
+                popInput(tid);
                  insts_in = NULL;
  
                  if (processMoreThanOneInput) {
                      DPRINTF(Decode, "Wrapping\n");
-                    insts_in = getInput();
+                    insts_in = getInput(tid);
                  }
              }
          }
@@ -261,22 +273,65 @@ Decode::evaluate()
      if (!insts_out.isBubble()) {
          /* Note activity of following buffer */
          cpu.activityRecorder->activity();
-        nextStageReserve.reserve();
+        insts_out.threadId = tid;
+        nextStageReserve[tid].reserve();
      }
  
      /* If we still have input to process and somewhere to put it,
       *  mark stage as active */
-    if (getInput() && nextStageReserve.canReserve())
-        cpu.activityRecorder->activateStage(Pipeline::DecodeStageId);
+    for (ThreadID i = 0; i < cpu.numThreads; i++)
+    {
+        if (getInput(i) && nextStageReserve[i].canReserve()) {
+            cpu.activityRecorder->activateStage(Pipeline::DecodeStageId);
+            break;
+        }
+    }
  
      /* Make sure the input (if any left) is pushed */
-    inputBuffer.pushTail();
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->threadId].pushTail();
+}
+
+inline ThreadID
+Decode::getScheduledThread()
+{
+    /* Select thread via policy. */
+    std::vector<ThreadID> priority_list;
+
+    switch (cpu.threadPolicy) {
+      case Enums::SingleThreaded:
+        priority_list.push_back(0);
+        break;
+      case Enums::RoundRobin:
+        priority_list = cpu.roundRobinPriority(threadPriority);
+        break;
+      case Enums::Random:
+        priority_list = cpu.randomPriority();
+        break;
+      default:
+        panic("Unknown fetch policy");
+    }
+
+    for (auto tid : priority_list) {
+        if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+            getInput(tid) && !decodeInfo[tid].blocked) {
+            threadPriority = tid;
+            return tid;
+        }
+    }
+
+   return InvalidThreadID;
  }
  
  bool
  Decode::isDrained()
  {
-    return inputBuffer.empty() && (*inp.outputWire).isBubble();
+    for (const auto &buffer : inputBuffer) {
+        if (!buffer.empty())
+            return false;
+    }
+
+    return (*inp.outputWire).isBubble();
  }
  
  void
@@ -284,13 +339,13 @@ Decode::minorTrace() const
  {
      std::ostringstream data;
  
-    if (blocked)
+    if (decodeInfo[0].blocked)
          data << 'B';
      else
          (*out.inputWire).reportData(data);
  
      MINORTRACE("insts=%s\n", data.str());
-    inputBuffer.minorTrace();
+    inputBuffer[0].minorTrace();
  }
  
  }
diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh

index fcc18fd44f5866ac394d79f7246a31b9db0bb0bb..a4d29a59d96a9bac29fee4fd73ab36bd19b51ee3 100644 (file)
--- a/src/cpu/minor/decode.hh
+++ b/src/cpu/minor/decode.hh
@@ -71,7 +71,7 @@ class Decode : public Named
      Latch<ForwardInstData>::Input out;
  
      /** Interface to reserve space in the next stage */
-    Reservable &nextStageReserve;
+    std::vector<InputBuffer<ForwardInstData>> &nextStageReserve;
  
      /** Width of output of this stage/input of next in instructions */
      unsigned int outputWidth;
@@ -82,43 +82,68 @@ class Decode : public Named
  
    public:
      /* Public for Pipeline to be able to pass it to Fetch2 */
-    InputBuffer<ForwardInstData> inputBuffer;
+    std::vector<InputBuffer<ForwardInstData>> inputBuffer;
  
    protected:
      /** Data members after this line are cycle-to-cycle state */
  
-    /** Index into the inputBuffer's head marking the start of unhandled
-     *  instructions */
-    unsigned int inputIndex;
+    struct DecodeThreadInfo {
  
-    /** True when we're in the process of decomposing a micro-op and
-     *  microopPC will be valid.  This is only the case when there isn't
-     *  sufficient space in Executes input buffer to take the whole of a
-     *  decomposed instruction and some of that instructions micro-ops must
-     *  be generated in a later cycle */
-    bool inMacroop;
-    TheISA::PCState microopPC;
+        /** Default Constructor */
+        DecodeThreadInfo() :
+            inputIndex(0),
+            inMacroop(false),
+            execSeqNum(InstId::firstExecSeqNum),
+            blocked(false)
+        { }
  
-    /** Source of execSeqNums to number instructions. */
-    InstSeqNum execSeqNum;
+        DecodeThreadInfo(const DecodeThreadInfo& other) :
+            inputIndex(other.inputIndex),
+            inMacroop(other.inMacroop),
+            execSeqNum(other.execSeqNum),
+            blocked(other.blocked)
+        { }
  
-    /** Blocked indication for report */
-    bool blocked;
+
+        /** Index into the inputBuffer's head marking the start of unhandled
+         *  instructions */
+        unsigned int inputIndex;
+
+        /** True when we're in the process of decomposing a micro-op and
+         *  microopPC will be valid.  This is only the case when there isn't
+         *  sufficient space in Executes input buffer to take the whole of a
+         *  decomposed instruction and some of that instructions micro-ops must
+         *  be generated in a later cycle */
+        bool inMacroop;
+        TheISA::PCState microopPC;
+
+        /** Source of execSeqNums to number instructions. */
+        InstSeqNum execSeqNum;
+
+        /** Blocked indication for report */
+        bool blocked;
+    };
+
+    std::vector<DecodeThreadInfo> decodeInfo;
+    ThreadID threadPriority;
  
    protected:
      /** Get a piece of data to work on, or 0 if there is no data. */
-    const ForwardInstData *getInput();
+    const ForwardInstData *getInput(ThreadID tid);
  
      /** Pop an element off the input buffer, if there are any */
-    void popInput();
+    void popInput(ThreadID tid);
  
+    /** Use the current threading policy to determine the next thread to
+     *  decode from. */
+    ThreadID getScheduledThread();
    public:
      Decode(const std::string &name,
          MinorCPU &cpu_,
          MinorCPUParams &params,
          Latch<ForwardInstData>::Output inp_,
          Latch<ForwardInstData>::Input out_,
-        Reservable &next_stage_input_buffer);
+        std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer);
  
    public:
      /** Pass on input/buffer data to the output if you can */
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc

index ab08e6b4aa69fc60ba93314d0e02af9f4e434084..5d54f6913dc581512da25def8202044def392237 100644 (file)
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -52,6 +52,12 @@
  namespace Minor
  {
  
+const InstSeqNum InstId::firstStreamSeqNum;
+const InstSeqNum InstId::firstPredictionSeqNum;
+const InstSeqNum InstId::firstLineSeqNum;
+const InstSeqNum InstId::firstFetchSeqNum;
+const InstSeqNum InstId::firstExecSeqNum;
+
  std::ostream &
  operator <<(std::ostream &os, const InstId &id)
  {
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh

index 092ad5a2dcba82ba8d919980c85fb345224149a6..a5d646b6c497d684298470cecd9441feacd19798 100644 (file)
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -342,12 +342,17 @@ class ExecContext : public ::ExecContext
  
    public:
      // monitor/mwait funtions
-    void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); }
-    bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); }
+    void armMonitor(Addr address)
+    { getCpuPtr()->armMonitor(inst->id.threadId, address); }
+
+    bool mwait(PacketPtr pkt)
+    { return getCpuPtr()->mwait(inst->id.threadId, pkt); }
+
      void mwaitAtomic(ThreadContext *tc)
-    { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); }
+    { return getCpuPtr()->mwaitAtomic(inst->id.threadId, tc, thread.dtb); }
+
      AddressMonitor *getAddrMonitor()
-    { return getCpuPtr()->getCpuAddrMonitor(0); }
+    { return getCpuPtr()->getCpuAddrMonitor(inst->id.threadId); }
  };
  
  }
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc

index 4298e1dcc69a2c2bf43e41f3d161dc786d993d34..b13e0c0203c983e41685b4615090f22470a56a1c 100644 (file)
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -86,15 +86,10 @@ Execute::Execute(const std::string &name_,
          params.executeLSQTransfersQueueSize,
          params.executeLSQStoreBufferSize,
          params.executeLSQMaxStoreBufferStoresPerCycle),
-    scoreboard(name_ + ".scoreboard"),
-    inputBuffer(name_ + ".inputBuffer", "insts",
-        params.executeInputBufferSize),
-    inputIndex(0),
-    lastCommitWasEndOfMacroop(true),
-    instsBeingCommitted(params.executeCommitLimit),
-    streamSeqNum(InstId::firstStreamSeqNum),
-    lastPredictionSeqNum(InstId::firstPredictionSeqNum),
-    drainState(NotDraining)
+    executeInfo(params.numThreads, ExecuteThreadInfo(params.executeCommitLimit)),
+    interruptPriority(0),
+    issuePriority(0),
+    commitPriority(0)
  {
      if (commitLimit < 1) {
          fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_,
@@ -170,35 +165,50 @@ Execute::Execute(const std::string &name_,
          }
      }
  
-    inFlightInsts = new Queue<QueuedInst,
-        ReportTraitsAdaptor<QueuedInst> >(
-        name_ + ".inFlightInsts", "insts", total_slots);
+    /* Per-thread structures */
+    for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+        std::string tid_str = std::to_string(tid);
  
-    inFUMemInsts = new Queue<QueuedInst,
-        ReportTraitsAdaptor<QueuedInst> >(
-        name_ + ".inFUMemInsts", "insts", total_slots);
+        /* Input Buffers */
+        inputBuffer.push_back(
+            InputBuffer<ForwardInstData>(
+                name_ + ".inputBuffer" + tid_str, "insts",
+                params.executeInputBufferSize));
+
+        /* Scoreboards */
+        scoreboard.push_back(Scoreboard(name_ + ".scoreboard" + tid_str));
+
+        /* In-flight instruction records */
+        executeInfo[tid].inFlightInsts =  new Queue<QueuedInst,
+            ReportTraitsAdaptor<QueuedInst> >(
+            name_ + ".inFlightInsts" + tid_str, "insts", total_slots);
+
+        executeInfo[tid].inFUMemInsts = new Queue<QueuedInst,
+            ReportTraitsAdaptor<QueuedInst> >(
+            name_ + ".inFUMemInsts" + tid_str, "insts", total_slots);
+    }
  }
  
  const ForwardInstData *
-Execute::getInput()
+Execute::getInput(ThreadID tid)
  {
      /* Get a line from the inputBuffer to work with */
-    if (!inputBuffer.empty()) {
-        const ForwardInstData &head = inputBuffer.front();
+    if (!inputBuffer[tid].empty()) {
+        const ForwardInstData &head = inputBuffer[tid].front();
  
-        return (head.isBubble() ? NULL : &(inputBuffer.front()));
+        return (head.isBubble() ? NULL : &(inputBuffer[tid].front()));
      } else {
          return NULL;
      }
  }
  
  void
-Execute::popInput()
+Execute::popInput(ThreadID tid)
  {
-    if (!inputBuffer.empty())
-        inputBuffer.pop();
+    if (!inputBuffer[tid].empty())
+        inputBuffer[tid].pop();
  
-    inputIndex = 0;
+    executeInfo[tid].inputIndex = 0;
  }
  
  void
@@ -276,11 +286,12 @@ Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch)
          reason = BranchData::NoBranch;
      }
  
-    updateBranchData(reason, inst, target, branch);
+    updateBranchData(inst->id.threadId, reason, inst, target, branch);
  }
  
  void
  Execute::updateBranchData(
+    ThreadID tid,
      BranchData::Reason reason,
      MinorDynInstPtr inst, const TheISA::PCState &target,
      BranchData &branch)
@@ -288,14 +299,15 @@ Execute::updateBranchData(
      if (reason != BranchData::NoBranch) {
          /* Bump up the stream sequence number on a real branch*/
          if (BranchData::isStreamChange(reason))
-            streamSeqNum++;
+            executeInfo[tid].streamSeqNum++;
  
          /* Branches (even mis-predictions) don't change the predictionSeqNum,
           *  just the streamSeqNum */
-        branch = BranchData(reason, streamSeqNum,
+        branch = BranchData(reason, tid,
+            executeInfo[tid].streamSeqNum,
              /* Maintaining predictionSeqNum if there's no inst is just a
               * courtesy and looks better on minorview */
-            (inst->isBubble() ? lastPredictionSeqNum
+            (inst->isBubble() ? executeInfo[tid].lastPredictionSeqNum
                  : inst->id.predictionSeqNum),
              target, inst);
  
@@ -419,8 +431,9 @@ Execute::takeInterrupt(ThreadID thread_id, BranchData &branch)
  
          /* Assume that an interrupt *must* cause a branch.  Assert this? */
  
-        updateBranchData(BranchData::Interrupt, MinorDynInst::bubble(),
-            cpu.getContext(thread_id)->pcState(), branch);
+        updateBranchData(thread_id, BranchData::Interrupt,
+            MinorDynInst::bubble(), cpu.getContext(thread_id)->pcState(),
+            branch);
      }
  
      return interrupt != NoFault;
@@ -506,9 +519,10 @@ cyclicIndexDec(unsigned int index, unsigned int cycle_size)
  }
  
  unsigned int
-Execute::issue(bool only_issue_microops)
+Execute::issue(ThreadID thread_id)
  {
-    const ForwardInstData *insts_in = getInput();
+    const ForwardInstData *insts_in = getInput(thread_id);
+    ExecuteThreadInfo &thread = executeInfo[thread_id];
  
      /* Early termination if we have no instructions */
      if (!insts_in)
@@ -534,8 +548,7 @@ Execute::issue(bool only_issue_microops)
      unsigned num_insts_discarded = 0;
  
      do {
-        MinorDynInstPtr inst = insts_in->insts[inputIndex];
-        ThreadID thread_id = inst->id.threadId;
+        MinorDynInstPtr inst = insts_in->insts[thread.inputIndex];
          Fault fault = inst->fault;
          bool discarded = false;
          bool issued_mem_ref = false;
@@ -550,21 +563,12 @@ Execute::issue(bool only_issue_microops)
                  " thread\n", *inst);
  
              issued = false;
-        } else if (inst->id.streamSeqNum != streamSeqNum) {
+        } else if (inst->id.streamSeqNum != thread.streamSeqNum) {
              DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
                  " state was unexpected, expected: %d\n",
-                *inst, streamSeqNum);
+                *inst, thread.streamSeqNum);
              issued = true;
              discarded = true;
-        } else if (fault == NoFault && only_issue_microops &&
-            /* Is this anything other than a non-first microop */
-            (!inst->staticInst->isMicroop() ||
-                !inst->staticInst->isFirstMicroop()))
-        {
-            DPRINTF(MinorExecute, "Not issuing new non-microop inst: %s\n",
-                *inst);
-
-            issued = false;
          } else {
              /* Try and issue an instruction into an FU, assume we didn't and
               * fix that in the loop */
@@ -598,9 +602,10 @@ Execute::issue(bool only_issue_microops)
  
                      /* Mark the destinations for this instruction as
                       *  busy */
-                    scoreboard.markupInstDests(inst, cpu.curCycle() +
+                    scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
                          Cycles(0), cpu.getContext(thread_id), false);
  
+                    DPRINTF(MinorExecute, "Issuing %s to %d\n", inst->id, noCostFUIndex);
                      inst->fuIndex = noCostFUIndex;
                      inst->extraCommitDelay = Cycles(0);
                      inst->extraCommitDelayExpr = NULL;
@@ -608,7 +613,7 @@ Execute::issue(bool only_issue_microops)
                      /* Push the instruction onto the inFlight queue so
                       *  it can be committed in order */
                      QueuedInst fu_inst(inst);
-                    inFlightInsts->push(fu_inst);
+                    thread.inFlightInsts->push(fu_inst);
  
                      issued = true;
  
@@ -644,8 +649,8 @@ Execute::issue(bool only_issue_microops)
                          DPRINTF(MinorExecute, "Can't issue inst: %s as extra"
                              " decoding is suppressing it\n",
                              *inst);
-                    } else if (!scoreboard.canInstIssue(inst, src_latencies,
-                        cant_forward_from_fu_indices,
+                    } else if (!scoreboard[thread_id].canInstIssue(inst,
+                        src_latencies, cant_forward_from_fu_indices,
                          cpu.curCycle(), cpu.getContext(thread_id)))
                      {
                          DPRINTF(MinorExecute, "Can't issue inst: %s yet\n",
@@ -687,20 +692,20 @@ Execute::issue(bool only_issue_microops)
                               *  early */
                              if (allowEarlyMemIssue) {
                                  inst->instToWaitFor =
-                                    scoreboard.execSeqNumToWaitFor(inst,
+                                    scoreboard[thread_id].execSeqNumToWaitFor(inst,
                                          cpu.getContext(thread_id));
  
-                                if (lsq.getLastMemBarrier() >
+                                if (lsq.getLastMemBarrier(thread_id) >
                                      inst->instToWaitFor)
                                  {
                                      DPRINTF(MinorExecute, "A barrier will"
                                          " cause a delay in mem ref issue of"
                                          " inst: %s until after inst"
                                          " %d(exec)\n", *inst,
-                                        lsq.getLastMemBarrier());
+                                        lsq.getLastMemBarrier(thread_id));
  
                                      inst->instToWaitFor =
-                                        lsq.getLastMemBarrier();
+                                        lsq.getLastMemBarrier(thread_id);
                                  } else {
                                      DPRINTF(MinorExecute, "Memory ref inst:"
                                          " %s must wait for inst %d(exec)"
@@ -714,7 +719,7 @@ Execute::issue(bool only_issue_microops)
                               *  queue to ensure in-order issue to the LSQ */
                              DPRINTF(MinorExecute, "Pushing mem inst: %s\n",
                                  *inst);
-                            inFUMemInsts->push(fu_inst);
+                            thread.inFUMemInsts->push(fu_inst);
                          }
  
                          /* Issue to FU */
@@ -725,7 +730,7 @@ Execute::issue(bool only_issue_microops)
  
                          /* Mark the destinations for this instruction as
                           *  busy */
-                        scoreboard.markupInstDests(inst, cpu.curCycle() +
+                        scoreboard[thread_id].markupInstDests(inst, cpu.curCycle() +
                              fu->description.opLat +
                              extra_dest_retire_lat +
                              extra_assumed_lat,
@@ -734,7 +739,7 @@ Execute::issue(bool only_issue_microops)
  
                          /* Push the instruction onto the inFlight queue so
                           *  it can be committed in order */
-                        inFlightInsts->push(fu_inst);
+                        thread.inFlightInsts->push(fu_inst);
  
                          issued = true;
                      }
@@ -777,24 +782,24 @@ Execute::issue(bool only_issue_microops)
                      DPRINTF(MinorExecute, "Reached inst issue limit\n");
              }
  
-            inputIndex++;
+            thread.inputIndex++;
              DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n",
-                inputIndex);
+                thread.inputIndex);
          }
  
          /* Got to the end of a line */
-        if (inputIndex == insts_in->width()) {
-            popInput();
+        if (thread.inputIndex == insts_in->width()) {
+            popInput(thread_id);
              /* Set insts_in to null to force us to leave the surrounding
               *  loop */
              insts_in = NULL;
  
              if (processMoreThanOneInput) {
                  DPRINTF(MinorExecute, "Wrapping\n");
-                insts_in = getInput();
+                insts_in = getInput(thread_id);
              }
          }
-    } while (insts_in && inputIndex < insts_in->width() &&
+    } while (insts_in && thread.inputIndex < insts_in->width() &&
          /* We still have instructions */
          fu_index != numFuncUnits && /* Not visited all FUs */
          issued && /* We've not yet failed to issue an instruction */
@@ -805,9 +810,9 @@ Execute::issue(bool only_issue_microops)
  }
  
  bool
-Execute::tryPCEvents()
+Execute::tryPCEvents(ThreadID thread_id)
  {
-    ThreadContext *thread = cpu.getContext(0);
+    ThreadContext *thread = cpu.getContext(thread_id);
      unsigned int num_pc_event_checks = 0;
  
      /* Handle PC events on instructions */
@@ -934,6 +939,11 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
              " there isn't space in the store buffer\n", *inst);
  
          completed_inst = false;
+    } else if (inst->isInst() && inst->staticInst->isQuiesce()
+            && !branch.isBubble()){
+        /* This instruction can suspend, need to be able to communicate
+         * backwards, so no other branches may evaluate this cycle*/
+        completed_inst = false;
      } else {
          ExecContext context(cpu, *cpu.threads[thread_id], *this, inst);
  
@@ -962,7 +972,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
          /* Keep a copy of this instruction's predictionSeqNum just in case
           * we need to issue a branch without an instruction (such as an
           * interrupt) */
-        lastPredictionSeqNum = inst->id.predictionSeqNum;
+        executeInfo[thread_id].lastPredictionSeqNum = inst->id.predictionSeqNum;
  
          /* Check to see if this instruction suspended the current thread. */
          if (!inst->isFault() &&
@@ -971,17 +981,17 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
              !isInterrupted(thread_id)) /* Don't suspend if we have
                  interrupts */
          {
-            TheISA::PCState resume_pc = cpu.getContext(0)->pcState();
+            TheISA::PCState resume_pc = cpu.getContext(thread_id)->pcState();
  
              assert(resume_pc.microPC() == 0);
  
              DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
-                " inst: %s\n", inst->id.threadId, *inst);
+                " inst: %s\n", thread_id, *inst);
  
              cpu.stats.numFetchSuspends++;
  
-            updateBranchData(BranchData::SuspendThread, inst, resume_pc,
-                branch);
+            updateBranchData(thread_id, BranchData::SuspendThread, inst,
+                resume_pc, branch);
          }
      }
  
@@ -989,10 +999,12 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
  }
  
  void
-Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
+Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
+    BranchData &branch)
  {
      Fault fault = NoFault;
      Cycles now = cpu.curCycle();
+    ExecuteThreadInfo &ex_info = executeInfo[thread_id];
  
      /**
       * Try and execute as many instructions from the end of FU pipelines as
@@ -1030,13 +1042,13 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
       *  memCommitLimit */
      unsigned int num_mem_refs_committed = 0;
  
-    if (only_commit_microops && !inFlightInsts->empty()) {
+    if (only_commit_microops && !ex_info.inFlightInsts->empty()) {
          DPRINTF(MinorInterrupt, "Only commit microops %s %d\n",
-            *(inFlightInsts->front().inst),
-            lastCommitWasEndOfMacroop);
+            *(ex_info.inFlightInsts->front().inst),
+            ex_info.lastCommitWasEndOfMacroop);
      }
  
-    while (!inFlightInsts->empty() && /* Some more instructions to process */
+    while (!ex_info.inFlightInsts->empty() && /* Some more instructions to process */
          !branch.isStreamChange() && /* No real branch */
          fault == NoFault && /* No faults */
          completed_inst && /* Still finding instructions to execute */
@@ -1046,10 +1058,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
          if (only_commit_microops) {
              DPRINTF(MinorInterrupt, "Committing tail of insts before"
                  " interrupt: %s\n",
-                *(inFlightInsts->front().inst));
+                *(ex_info.inFlightInsts->front().inst));
          }
  
-        QueuedInst *head_inflight_inst = &(inFlightInsts->front());
+        QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
  
          InstSeqNum head_exec_seq_num =
              head_inflight_inst->inst->id.execSeqNum;
@@ -1071,8 +1083,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
          /* If we're just completing a macroop before an interrupt or drain,
           *  can we stil commit another microop (rather than a memory response)
           *  without crosing into the next full instruction? */
-        bool can_commit_insts = !inFlightInsts->empty() &&
-            !(only_commit_microops && lastCommitWasEndOfMacroop);
+        bool can_commit_insts = !ex_info.inFlightInsts->empty() &&
+            !(only_commit_microops && ex_info.lastCommitWasEndOfMacroop);
  
          /* Can we find a mem response for this inst */
          LSQ::LSQRequestPtr mem_response =
@@ -1082,18 +1094,18 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
              can_commit_insts);
  
          /* Test for PC events after every instruction */
-        if (isInbetweenInsts() && tryPCEvents()) {
-            ThreadContext *thread = cpu.getContext(0);
+        if (isInbetweenInsts(thread_id) && tryPCEvents(thread_id)) {
+            ThreadContext *thread = cpu.getContext(thread_id);
  
              /* Branch as there was a change in PC */
-            updateBranchData(BranchData::UnpredictedBranch,
+            updateBranchData(thread_id, BranchData::UnpredictedBranch,
                  MinorDynInst::bubble(), thread->pcState(), branch);
          } else if (mem_response &&
              num_mem_refs_committed < memoryCommitLimit)
          {
              /* Try to commit from the memory responses next */
-            discard_inst = inst->id.streamSeqNum != streamSeqNum ||
-                discard;
+            discard_inst = inst->id.streamSeqNum !=
+                           ex_info.streamSeqNum || discard;
  
              DPRINTF(MinorExecute, "Trying to commit mem response: %s\n",
                  *inst);
@@ -1102,7 +1114,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
              if (discard_inst) {
                  DPRINTF(MinorExecute, "Discarding mem inst: %s as its"
                      " stream state was unexpected, expected: %d\n",
-                    *inst, streamSeqNum);
+                    *inst, ex_info.streamSeqNum);
  
                  lsq.popResponse(mem_response);
              } else {
@@ -1128,11 +1140,11 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
               *  For any other case, leave it to the normal instruction
               *  issue below to handle them.
               */
-            if (!inFUMemInsts->empty() && lsq.canRequest()) {
+            if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
                  DPRINTF(MinorExecute, "Trying to commit from mem FUs\n");
  
                  const MinorDynInstPtr head_mem_ref_inst =
-                    inFUMemInsts->front().inst;
+                    ex_info.inFUMemInsts->front().inst;
                  FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
                  const MinorDynInstPtr &fu_inst = fu->front().inst;
  
@@ -1141,7 +1153,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                  if (!fu_inst->isBubble() &&
                      !fu_inst->inLSQ &&
                      fu_inst->canEarlyIssue &&
-                    streamSeqNum == fu_inst->id.streamSeqNum &&
+                    ex_info.streamSeqNum == fu_inst->id.streamSeqNum &&
                      head_exec_seq_num > fu_inst->instToWaitFor)
                  {
                      DPRINTF(MinorExecute, "Issuing mem ref early"
@@ -1184,7 +1196,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                       * actually at the end of its pipeline
                       * Future instruction: handled above and only for
                       * mem refs on their way to the LSQ */
-                } else /* if (fu_inst_seq_num == head_exec_seq_num) */ {
+                } else if (fu_inst.inst->id == inst->id)  {
                      /* All instructions can be committed if they have the
                       *  right execSeqNum and there are no in-flight
                       *  mem insts before us */
@@ -1194,8 +1206,8 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
              }
  
              if (try_to_commit) {
-                discard_inst = inst->id.streamSeqNum != streamSeqNum ||
-                    discard;
+                discard_inst = inst->id.streamSeqNum !=
+                    ex_info.streamSeqNum || discard;
  
                  /* Is this instruction discardable as its streamSeqNum
                   *  doesn't match? */
@@ -1209,8 +1221,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                          DPRINTF(MinorExecute, "Evaluating expression for"
                              " extra commit delay inst: %s\n", *inst);
  
-                        ThreadContext *thread =
-                            cpu.getContext(inst->id.threadId);
+                        ThreadContext *thread = cpu.getContext(thread_id);
  
                          TimingExprEvalContext context(inst->staticInst,
                              thread, NULL);
@@ -1241,9 +1252,9 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                      /* @todo Think about making lastMemBarrier be
                       *  MAX_UINT_64 to avoid using 0 as a marker value */
                      if (!inst->isFault() && inst->isMemRef() &&
-                        lsq.getLastMemBarrier() <
+                        lsq.getLastMemBarrier(thread_id) <
                              inst->id.execSeqNum &&
-                        lsq.getLastMemBarrier() != 0)
+                        lsq.getLastMemBarrier(thread_id) != 0)
                      {
                          DPRINTF(MinorExecute, "Not committing inst: %s yet"
                              " as there are incomplete barriers in flight\n",
@@ -1269,8 +1280,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                       *  instruction wasn't the inFlightInsts head
                       *  but had already been committed, it would have
                       *  unstalled the pipeline before here */
-                    if (inst->fuIndex != noCostFUIndex)
+                    if (inst->fuIndex != noCostFUIndex) {
+                        DPRINTF(MinorExecute, "Unstalling %d for inst %s\n", inst->fuIndex, inst->id);
                          funcUnits[inst->fuIndex]->stalled = false;
+                    }
                  }
              }
          } else {
@@ -1286,7 +1299,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
          if (discard_inst) {
              DPRINTF(MinorExecute, "Discarding inst: %s as its stream"
                  " state was unexpected, expected: %d\n",
-                *inst, streamSeqNum);
+                *inst, ex_info.streamSeqNum);
  
              if (fault == NoFault)
                  cpu.stats.numDiscardedOps++;
@@ -1303,10 +1316,10 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
          if (completed_inst && inst->isMemRef()) {
              /* The MemRef could have been discarded from the FU or the memory
               *  queue, so just check an FU instruction */
-            if (!inFUMemInsts->empty() &&
-                inFUMemInsts->front().inst == inst)
+            if (!ex_info.inFUMemInsts->empty() &&
+                ex_info.inFUMemInsts->front().inst == inst)
              {
-                inFUMemInsts->pop();
+                ex_info.inFUMemInsts->pop();
              }
          }
  
@@ -1315,16 +1328,16 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
              DPRINTF(MinorExecute, "Completed inst: %s\n", *inst);
  
              /* Got to the end of a full instruction? */
-            lastCommitWasEndOfMacroop = inst->isFault() ||
+            ex_info.lastCommitWasEndOfMacroop = inst->isFault() ||
                  inst->isLastOpInInst();
  
              /* lastPredictionSeqNum is kept as a convenience to prevent its
               *  value from changing too much on the minorview display */
-            lastPredictionSeqNum = inst->id.predictionSeqNum;
+            ex_info.lastPredictionSeqNum = inst->id.predictionSeqNum;
  
              /* Finished with the inst, remove it from the inst queue and
               *  clear its dependencies */
-            inFlightInsts->pop();
+            ex_info.inFlightInsts->pop();
  
              /* Complete barriers in the LSQ/move to store buffer */
              if (inst->isInst() && inst->staticInst->isMemBarrier()) {
@@ -1333,7 +1346,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
                  lsq.completeMemBarrierInst(inst, committed_inst);
              }
  
-            scoreboard.clearInstDests(inst, inst->isMemRef());
+            scoreboard[thread_id].clearInstDests(inst, inst->isMemRef());
          }
  
          /* Handle per-cycle instruction counting */
@@ -1343,7 +1356,7 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
              /* Don't show no cost instructions as having taken a commit
               *  slot */
              if (DTRACE(MinorTrace) && !is_no_cost_inst)
-                instsBeingCommitted.insts[num_insts_committed] = inst;
+                ex_info.instsBeingCommitted.insts[num_insts_committed] = inst;
  
              if (!is_no_cost_inst)
                  num_insts_committed++;
@@ -1369,124 +1382,112 @@ Execute::commit(bool only_commit_microops, bool discard, BranchData &branch)
  }
  
  bool
-Execute::isInbetweenInsts() const
+Execute::isInbetweenInsts(ThreadID thread_id) const
  {
-    return lastCommitWasEndOfMacroop &&
+    return executeInfo[thread_id].lastCommitWasEndOfMacroop &&
          !lsq.accessesInFlight();
  }
  
  void
  Execute::evaluate()
  {
-    inputBuffer.setTail(*inp.outputWire);
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->threadId].setTail(*inp.outputWire);
+
      BranchData &branch = *out.inputWire;
  
-    const ForwardInstData *insts_in = getInput();
+    unsigned int num_issued = 0;
  
      /* Do all the cycle-wise activities for dcachePort here to potentially
       *  free up input spaces in the LSQ's requests queue */
      lsq.step();
  
-    /* Has an interrupt been signalled?  This may not be acted on
-     *  straighaway so this is different from took_interrupt below */
+    /* Check interrupts first.  Will halt commit if interrupt found */
      bool interrupted = false;
-    /* If there was an interrupt signalled, was it acted on now? */
-    bool took_interrupt = false;
-
-    if (cpu.getInterruptController(0)) {
-        /* This is here because it seems that after drainResume the
-         * interrupt controller isn't always set */
-        interrupted = drainState == NotDraining && isInterrupted(0);
-    } else {
-        DPRINTF(MinorInterrupt, "No interrupt controller\n");
-    }
+    ThreadID interrupt_tid = checkInterrupts(branch, interrupted);
  
-    unsigned int num_issued = 0;
-
-    if (DTRACE(MinorTrace)) {
-        /* Empty the instsBeingCommitted for MinorTrace */
-        instsBeingCommitted.bubbleFill();
-    }
-
-    /* THREAD threadId on isInterrupted */
-    /* Act on interrupts */
-    if (interrupted && isInbetweenInsts()) {
-        took_interrupt = takeInterrupt(0, branch);
-        /* Clear interrupted if no interrupt was actually waiting */
-        interrupted = took_interrupt;
-    }
-
-    if (took_interrupt) {
-        /* Do no commit/issue this cycle */
+    if (interrupt_tid != InvalidThreadID) {
+        /* Signalling an interrupt this cycle, not issuing/committing from
+         * any other threads */
      } else if (!branch.isBubble()) {
          /* It's important that this is here to carry Fetch1 wakeups to Fetch1
           *  without overwriting them */
          DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old"
              " branch to complete\n");
      } else {
-        if (interrupted) {
-            if (inFlightInsts->empty()) {
-                DPRINTF(MinorInterrupt, "Waiting but no insts\n");
+        ThreadID commit_tid = getCommittingThread();
+
+        if (commit_tid != InvalidThreadID) {
+            ExecuteThreadInfo& commit_info = executeInfo[commit_tid];
+
+            DPRINTF(MinorExecute, "Attempting to commit [tid:%d]\n",
+                    commit_tid);
+            /* commit can set stalled flags observable to issue and so *must* be
+             *  called first */
+            if (commit_info.drainState != NotDraining) {
+                if (commit_info.drainState == DrainCurrentInst) {
+                    /* Commit only micro-ops, don't kill anything else */
+                    commit(commit_tid, true, false, branch);
+
+                    if (isInbetweenInsts(commit_tid))
+                        setDrainState(commit_tid, DrainHaltFetch);
+
+                    /* Discard any generated branch */
+                    branch = BranchData::bubble();
+                } else if (commit_info.drainState == DrainAllInsts) {
+                    /* Kill all instructions */
+                    while (getInput(commit_tid))
+                        popInput(commit_tid);
+                    commit(commit_tid, false, true, branch);
+                }
              } else {
-                DPRINTF(MinorInterrupt, "Waiting for end of inst before"
-                 " signalling interrupt\n");
+                /* Commit micro-ops only if interrupted.  Otherwise, commit
+                 *  anything you like */
+                DPRINTF(MinorExecute, "Committing micro-ops for interrupt[tid:%d]\n",
+                        commit_tid);
+                bool only_commit_microops = interrupted &&
+                                            hasInterrupt(commit_tid);
+                commit(commit_tid, only_commit_microops, false, branch);
              }
-        }
  
-        /* commit can set stalled flags observable to issue and so *must* be
-         *  called first */
-        if (drainState != NotDraining) {
-            if (drainState == DrainCurrentInst) {
-                /* Commit only micro-ops, don't kill anything else */
-                commit(true, false, branch);
-
-                if (isInbetweenInsts())
-                    setDrainState(DrainHaltFetch);
-
-                /* Discard any generated branch */
-                branch = BranchData::bubble();
-            } else if (drainState == DrainAllInsts) {
-                /* Kill all instructions */
-                while (getInput())
-                    popInput();
-                commit(false, true, branch);
+            /* Halt fetch, but don't do it until we have the current instruction in
+             *  the bag */
+            if (commit_info.drainState == DrainHaltFetch) {
+                updateBranchData(commit_tid, BranchData::HaltFetch,
+                        MinorDynInst::bubble(), TheISA::PCState(0), branch);
+
+                cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+                setDrainState(commit_tid, DrainAllInsts);
              }
-        } else {
-            /* Commit micro-ops only if interrupted.  Otherwise, commit
-             *  anything you like */
-            commit(interrupted, false, branch);
          }
-
+        ThreadID issue_tid = getIssuingThread();
          /* This will issue merrily even when interrupted in the sure and
           *  certain knowledge that the interrupt with change the stream */
-        if (insts_in)
-            num_issued = issue(false);
-    }
-
-    /* Halt fetch, but don't do it until we have the current instruction in
-     *  the bag */
-    if (drainState == DrainHaltFetch) {
-        updateBranchData(BranchData::HaltFetch, MinorDynInst::bubble(),
-            TheISA::PCState(0), branch);
+        if (issue_tid != InvalidThreadID) {
+            DPRINTF(MinorExecute, "Attempting to issue [tid:%d]\n",
+                    issue_tid);
+            num_issued = issue(issue_tid);
+        }
  
-        cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
-        setDrainState(DrainAllInsts);
      }
  
-    MinorDynInstPtr next_issuable_inst = NULL;
+    /* Run logic to step functional units + decide if we are active on the next
+     * clock cycle */
+    std::vector<MinorDynInstPtr> next_issuable_insts;
      bool can_issue_next = false;
  
-    /* Find the next issuable instruction and see if it can be issued */
-    if (getInput()) {
-        MinorDynInstPtr inst = getInput()->insts[inputIndex];
-
-        if (inst->isFault()) {
-            can_issue_next = true;
-        } else if (!inst->isBubble()) {
-            if (cpu.getContext(inst->id.threadId)->status() !=
-                ThreadContext::Suspended)
-            {
-                next_issuable_inst = inst;
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        /* Find the next issuable instruction for each thread and see if it can
+           be issued */
+        if (getInput(tid)) {
+            unsigned int input_index = executeInfo[tid].inputIndex;
+            MinorDynInstPtr inst = getInput(tid)->insts[input_index];
+            if (inst->isFault()) {
+                can_issue_next = true;
+            } else if (!inst->isBubble()) {
+                if (cpu.getContext(tid)->status() != ThreadContext::Suspended) {
+                    next_issuable_insts.push_back(inst);
+                }
              }
          }
      }
@@ -1494,53 +1495,56 @@ Execute::evaluate()
      bool becoming_stalled = true;
  
      /* Advance the pipelines and note whether they still need to be
-     *  advanced */
+     * advanced */
      for (unsigned int i = 0; i < numFuncUnits; i++) {
          FUPipeline *fu = funcUnits[i];
-
          fu->advance();
  
-        /* If we need to go again, the pipeline will have been left or set
-         *  to be unstalled */
-        if (fu->occupancy != 0 && !fu->stalled)
+        /* If we need to tick again, the pipeline will have been left or set
+         * to be unstalled */
+        if (fu->occupancy !=0 && !fu->stalled)
              becoming_stalled = false;
  
-        /* Could we possibly issue the next instruction?  This is quite
-         *  an expensive test */
-        if (next_issuable_inst && !fu->stalled &&
-            scoreboard.canInstIssue(next_issuable_inst,
-                NULL, NULL, cpu.curCycle() + Cycles(1),
-                cpu.getContext(next_issuable_inst->id.threadId)) &&
-            fu->provides(next_issuable_inst->staticInst->opClass()))
-        {
-            can_issue_next = true;
+        /* Could we possibly issue the next instruction from any thread?
+         * This is quite an expensive test and is only used to determine
+         * if the CPU should remain active, only run it if we aren't sure
+         * we are active next cycle yet */
+        for (auto inst : next_issuable_insts) {
+            if (!fu->stalled && fu->provides(inst->staticInst->opClass()) &&
+                scoreboard[inst->id.threadId].canInstIssue(inst,
+                    NULL, NULL, cpu.curCycle() + Cycles(1),
+                    cpu.getContext(inst->id.threadId))) {
+                can_issue_next = true;
+                break;
+            }
          }
      }
  
      bool head_inst_might_commit = false;
  
      /* Could the head in flight insts be committed */
-    if (!inFlightInsts->empty()) {
-        const QueuedInst &head_inst = inFlightInsts->front();
+    for (auto const &info : executeInfo) {
+        if (!info.inFlightInsts->empty()) {
+            const QueuedInst &head_inst = info.inFlightInsts->front();
  
-        if (head_inst.inst->isNoCostInst()) {
-            head_inst_might_commit = true;
-        } else {
-            FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
-
-            /* Head inst is commitable */
-            if ((fu->stalled &&
-                fu->front().inst->id == head_inst.inst->id) ||
-                lsq.findResponse(head_inst.inst))
-            {
+            if (head_inst.inst->isNoCostInst()) {
                  head_inst_might_commit = true;
+            } else {
+                FUPipeline *fu = funcUnits[head_inst.inst->fuIndex];
+                if ((fu->stalled &&
+                     fu->front().inst->id == head_inst.inst->id) ||
+                     lsq.findResponse(head_inst.inst))
+                {
+                    head_inst_might_commit = true;
+                    break;
+                }
              }
          }
      }
  
      DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n",
         (num_issued != 0 ? " (issued some insts)" : ""),
-       (becoming_stalled ? " (becoming stalled)" : "(not becoming stalled)"),
+       (becoming_stalled ? "(becoming stalled)" : "(not becoming stalled)"),
         (can_issue_next ? " (can issued next inst)" : ""),
         (head_inst_might_commit ? "(head inst might commit)" : ""),
         (lsq.needsToTick() ? " (LSQ needs to tick)" : ""),
@@ -1568,36 +1572,54 @@ Execute::evaluate()
          cpu.activityRecorder->activity();
  
      /* Make sure the input (if any left) is pushed */
-    inputBuffer.pushTail();
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->threadId].pushTail();
  }
  
-void
-Execute::wakeupFetch(BranchData::Reason reason)
+ThreadID
+Execute::checkInterrupts(BranchData& branch, bool& interrupted)
  {
-    BranchData branch;
-    assert(branch.isBubble());
-
-    /* THREAD thread id */
-    ThreadContext *thread = cpu.getContext(0);
-
-    /* Force a branch to the current PC (which should be the next inst.) to
-     *  wake up Fetch1 */
-    if (!branch.isStreamChange() /* No real branch already happened */) {
-        DPRINTF(MinorInterrupt, "Waking up Fetch (via Execute) by issuing"
-            " a branch: %s\n", thread->pcState());
+    ThreadID tid = interruptPriority;
+    /* Evaluate interrupts in round-robin based upon service */
+    do {
+        /* Has an interrupt been signalled?  This may not be acted on
+         *  straighaway so this is different from took_interrupt */
+        bool thread_interrupted = false;
+
+        if (FullSystem && cpu.getInterruptController(tid)) {
+            /* This is here because it seems that after drainResume the
+             * interrupt controller isn't always set */
+            thread_interrupted = executeInfo[tid].drainState == NotDraining &&
+                isInterrupted(tid);
+            interrupted = interrupted || thread_interrupted;
+        } else {
+            DPRINTF(MinorInterrupt, "No interrupt controller\n");
+        }
+        DPRINTF(MinorInterrupt, "[tid:%d] thread_interrupted?=%d isInbetweenInsts?=%d\n",
+                tid, thread_interrupted, isInbetweenInsts(tid));
+        /* Act on interrupts */
+        if (thread_interrupted && isInbetweenInsts(tid)) {
+            if (takeInterrupt(tid, branch)) {
+                interruptPriority = tid;
+                return tid;
+            }
+        } else {
+            tid = (tid + 1) % cpu.numThreads;
+        }
+    } while (tid != interruptPriority);
  
-        assert(thread->pcState().microPC() == 0);
+    return InvalidThreadID;
+}
  
-        updateBranchData(reason,
-            MinorDynInst::bubble(), thread->pcState(), branch);
-    } else {
-        DPRINTF(MinorInterrupt, "Already branching, no need for wakeup\n");
+bool
+Execute::hasInterrupt(ThreadID thread_id)
+{
+    if (FullSystem && cpu.getInterruptController(thread_id)) {
+        return executeInfo[thread_id].drainState == NotDraining &&
+               isInterrupted(thread_id);
      }
  
-    *out.inputWire = branch;
-
-    /* Make sure we get ticked */
-    cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+    return false;
  }
  
  void
@@ -1606,10 +1628,10 @@ Execute::minorTrace() const
      std::ostringstream insts;
      std::ostringstream stalled;
  
-    instsBeingCommitted.reportData(insts);
+    executeInfo[0].instsBeingCommitted.reportData(insts);
      lsq.minorTrace();
-    inputBuffer.minorTrace();
-    scoreboard.minorTrace();
+    inputBuffer[0].minorTrace();
+    scoreboard[0].minorTrace();
  
      /* Report functional unit stalling in one string */
      unsigned int i = 0;
@@ -1623,14 +1645,110 @@ Execute::minorTrace() const
  
      MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d"
          " stalled=%s drainState=%d isInbetweenInsts=%d\n",
-        insts.str(), inputIndex, streamSeqNum, stalled.str(), drainState,
-        isInbetweenInsts());
+        insts.str(), executeInfo[0].inputIndex, executeInfo[0].streamSeqNum,
+        stalled.str(), executeInfo[0].drainState, isInbetweenInsts(0));
  
      std::for_each(funcUnits.begin(), funcUnits.end(),
          std::mem_fun(&FUPipeline::minorTrace));
  
-    inFlightInsts->minorTrace();
-    inFUMemInsts->minorTrace();
+    executeInfo[0].inFlightInsts->minorTrace();
+    executeInfo[0].inFUMemInsts->minorTrace();
+}
+
+inline ThreadID
+Execute::getCommittingThread()
+{
+    std::vector<ThreadID> priority_list;
+
+    switch (cpu.threadPolicy) {
+      case Enums::SingleThreaded:
+          return 0;
+      case Enums::RoundRobin:
+          priority_list = cpu.roundRobinPriority(commitPriority);
+          break;
+      case Enums::Random:
+          priority_list = cpu.randomPriority();
+          break;
+      default:
+          panic("Invalid thread policy");
+    }
+
+    for (auto tid : priority_list) {
+        ExecuteThreadInfo &ex_info = executeInfo[tid];
+        bool can_commit_insts = !ex_info.inFlightInsts->empty();
+        if (can_commit_insts) {
+            QueuedInst *head_inflight_inst = &(ex_info.inFlightInsts->front());
+            MinorDynInstPtr inst = head_inflight_inst->inst;
+
+            can_commit_insts = can_commit_insts &&
+                (!inst->inLSQ || (lsq.findResponse(inst) != NULL));
+
+            if (!inst->inLSQ) {
+                bool can_transfer_mem_inst = false;
+                if (!ex_info.inFUMemInsts->empty() && lsq.canRequest()) {
+                    const MinorDynInstPtr head_mem_ref_inst =
+                        ex_info.inFUMemInsts->front().inst;
+                    FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex];
+                    const MinorDynInstPtr &fu_inst = fu->front().inst;
+                    can_transfer_mem_inst =
+                        !fu_inst->isBubble() &&
+                         fu_inst->id.threadId == tid &&
+                         !fu_inst->inLSQ &&
+                         fu_inst->canEarlyIssue &&
+                         inst->id.execSeqNum > fu_inst->instToWaitFor;
+                }
+
+                bool can_execute_fu_inst = inst->fuIndex == noCostFUIndex;
+                if (can_commit_insts && !can_transfer_mem_inst &&
+                        inst->fuIndex != noCostFUIndex)
+                {
+                    QueuedInst& fu_inst = funcUnits[inst->fuIndex]->front();
+                    can_execute_fu_inst = !fu_inst.inst->isBubble() &&
+                        fu_inst.inst->id == inst->id;
+                }
+
+                can_commit_insts = can_commit_insts &&
+                    (can_transfer_mem_inst || can_execute_fu_inst);
+            }
+        }
+
+
+        if (can_commit_insts) {
+            commitPriority = tid;
+            return tid;
+        }
+    }
+
+    return InvalidThreadID;
+}
+
+inline ThreadID
+Execute::getIssuingThread()
+{
+    std::vector<ThreadID> priority_list;
+
+    switch (cpu.threadPolicy) {
+      case Enums::SingleThreaded:
+          return 0;
+      case Enums::RoundRobin:
+          priority_list = cpu.roundRobinPriority(issuePriority);
+          break;
+      case Enums::Random:
+          priority_list = cpu.randomPriority();
+          break;
+      default:
+          panic("Invalid thread scheduling policy.");
+    }
+
+    for (auto tid : priority_list) {
+        if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+            getInput(tid)) {
+            issuePriority = tid;
+            return tid;
+        }
+    }
+
+    return InvalidThreadID;
  }
  
  void
@@ -1638,11 +1756,10 @@ Execute::drainResume()
  {
      DPRINTF(Drain, "MinorExecute drainResume\n");
  
-    setDrainState(NotDraining);
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        setDrainState(tid, NotDraining);
+    }
  
-    /* Wakeup fetch and keep the pipeline running until that branch takes
-     *  effect */
-    wakeupFetch(BranchData::WakeupFetch);
      cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
  }
  
@@ -1671,10 +1788,10 @@ std::ostream &operator <<(std::ostream &os, Execute::DrainState state)
  }
  
  void
-Execute::setDrainState(DrainState state)
+Execute::setDrainState(ThreadID thread_id, DrainState state)
  {
-    DPRINTF(Drain, "setDrainState: %s\n", state);
-    drainState = state;
+    DPRINTF(Drain, "setDrainState[%d]: %s\n", thread_id, state);
+    executeInfo[thread_id].drainState = state;
  }
  
  unsigned int
@@ -1682,29 +1799,39 @@ Execute::drain()
  {
      DPRINTF(Drain, "MinorExecute drain\n");
  
-    if (drainState == NotDraining) {
-        cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        if (executeInfo[tid].drainState == NotDraining) {
+            cpu.wakeupOnEvent(Pipeline::ExecuteStageId);
  
-        /* Go to DrainCurrentInst if we're between microops
-         * or waiting on an unbufferable memory operation.
-         * Otherwise we can go straight to DrainHaltFetch
-         */
-        if (isInbetweenInsts())
-            setDrainState(DrainHaltFetch);
-        else
-            setDrainState(DrainCurrentInst);
+            /* Go to DrainCurrentInst if we're between microops
+             * or waiting on an unbufferable memory operation.
+             * Otherwise we can go straight to DrainHaltFetch
+             */
+            if (isInbetweenInsts(tid))
+                setDrainState(tid, DrainHaltFetch);
+            else
+                setDrainState(tid, DrainCurrentInst);
+        }
      }
-
      return (isDrained() ? 0 : 1);
  }
  
  bool
  Execute::isDrained()
  {
-    return drainState == DrainAllInsts &&
-        inputBuffer.empty() &&
-        inFlightInsts->empty() &&
-        lsq.isDrained();
+    if (!lsq.isDrained())
+        return false;
+
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        if (executeInfo[tid].drainState != DrainAllInsts ||
+            !inputBuffer[tid].empty() ||
+            !executeInfo[tid].inFlightInsts->empty()) {
+
+            return false;
+        }
+    }
+
+    return true;
  }
  
  Execute::~Execute()
@@ -1712,13 +1839,14 @@ Execute::~Execute()
      for (unsigned int i = 0; i < numFuncUnits; i++)
          delete funcUnits[i];
  
-    delete inFlightInsts;
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+        delete executeInfo[tid].inFlightInsts;
  }
  
  bool
  Execute::instIsRightStream(MinorDynInstPtr inst)
  {
-    return inst->id.streamSeqNum == streamSeqNum;
+    return inst->id.streamSeqNum == executeInfo[inst->id.threadId].streamSeqNum;
  }
  
  bool
@@ -1726,8 +1854,8 @@ Execute::instIsHeadInst(MinorDynInstPtr inst)
  {
      bool ret = false;
  
-    if (!inFlightInsts->empty())
-        ret = inFlightInsts->front().inst->id == inst->id;
+    if (!executeInfo[inst->id.threadId].inFlightInsts->empty())
+        ret = executeInfo[inst->id.threadId].inFlightInsts->front().inst->id == inst->id;
  
      return ret;
  }
diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh

index 8cd0265340060e2a7097e53e533af95f2f2640ab..165a5bae4631017eb5ba178c1b661c80e209732d 100644 (file)
--- a/src/cpu/minor/execute.hh
+++ b/src/cpu/minor/execute.hh
@@ -116,13 +116,13 @@ class Execute : public Named
      LSQ lsq;
  
      /** Scoreboard of instruction dependencies */
-    Scoreboard scoreboard;
+    std::vector<Scoreboard> scoreboard;
  
      /** The execution functional units */
      std::vector<FUPipeline *> funcUnits;
  
    public: /* Public for Pipeline to be able to pass it to Decode */
-    InputBuffer<ForwardInstData> inputBuffer;
+    std::vector<InputBuffer<ForwardInstData>> inputBuffer;
  
    protected:
      /** Stage cycle-by-cycle state */
@@ -143,48 +143,75 @@ class Execute : public Named
          DrainAllInsts /* Discarding all remaining insts */
      };
  
-    /** In-order instructions either in FUs or the LSQ */
-    Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts;
-
-    /** Memory ref instructions still in the FUs */
-    Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts;
-
-    /** Index that we've completed upto in getInput data.  We can say we're
-     *  popInput when this equals getInput()->width() */
-    unsigned int inputIndex;
-
-    /** The last commit was the end of a full instruction so an interrupt
-     *  can safely happen */
-    bool lastCommitWasEndOfMacroop;
-
-    /** Structure for reporting insts currently being processed/retired
-     *  for MinorTrace */
-    ForwardInstData instsBeingCommitted;
-
-    /** Source of sequence number for instuction streams.  Increment this and
-     *  pass to fetch whenever an instruction stream needs to be changed.
-     *  For any more complicated behaviour (e.g. speculation) there'll need
-     *  to be another plan. THREAD, need one for each thread */
-    InstSeqNum streamSeqNum;
+    struct ExecuteThreadInfo {
+        /** Constructor */
+        ExecuteThreadInfo(unsigned int insts_committed) :
+            inputIndex(0),
+            lastCommitWasEndOfMacroop(true),
+            instsBeingCommitted(insts_committed),
+            streamSeqNum(InstId::firstStreamSeqNum),
+            lastPredictionSeqNum(InstId::firstPredictionSeqNum),
+            drainState(NotDraining)
+        { }
+
+        ExecuteThreadInfo(const ExecuteThreadInfo& other) :
+            inputIndex(other.inputIndex),
+            lastCommitWasEndOfMacroop(other.lastCommitWasEndOfMacroop),
+            instsBeingCommitted(other.instsBeingCommitted),
+            streamSeqNum(other.streamSeqNum),
+            lastPredictionSeqNum(other.lastPredictionSeqNum),
+            drainState(other.drainState)
+        { }
+
+        /** In-order instructions either in FUs or the LSQ */
+        Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts;
+
+        /** Memory ref instructions still in the FUs */
+        Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts;
+
+        /** Index that we've completed upto in getInput data.  We can say we're
+         *  popInput when this equals getInput()->width() */
+        unsigned int inputIndex;
+
+        /** The last commit was the end of a full instruction so an interrupt
+         *  can safely happen */
+        bool lastCommitWasEndOfMacroop;
+
+        /** Structure for reporting insts currently being processed/retired
+         *  for MinorTrace */
+        ForwardInstData instsBeingCommitted;
+
+        /** Source of sequence number for instuction streams.  Increment this and
+         *  pass to fetch whenever an instruction stream needs to be changed.
+         *  For any more complicated behaviour (e.g. speculation) there'll need
+         *  to be another plan. */
+        InstSeqNum streamSeqNum;
+
+        /** A prediction number for use where one isn't available from an
+         *  instruction.  This is harvested from committed instructions.
+         *  This isn't really needed as the streamSeqNum will change on
+         *  a branch, but it minimises disruption in stream identification */
+        InstSeqNum lastPredictionSeqNum;
+
+        /** State progression for draining NotDraining -> ... -> DrainAllInsts */
+        DrainState drainState;
+    };
  
-    /** A prediction number for use where one isn't available from an
-     *  instruction.  This is harvested from committed instructions.
-     *  This isn't really needed as the streamSeqNum will change on
-     *  a branch, but it minimises disruption in stream identification */
-    InstSeqNum lastPredictionSeqNum;
+    std::vector<ExecuteThreadInfo> executeInfo;
  
-    /** State progression for draining NotDraining -> ... -> DrainAllInsts */
-    DrainState drainState;
+    ThreadID interruptPriority;
+    ThreadID issuePriority;
+    ThreadID commitPriority;
  
    protected:
      friend std::ostream &operator <<(std::ostream &os, DrainState state);
  
      /** Get a piece of data to work on from the inputBuffer, or 0 if there
       *  is no data. */
-    const ForwardInstData *getInput();
+    const ForwardInstData *getInput(ThreadID tid);
  
      /** Pop an element off the input buffer, if there are any */
-    void popInput();
+    void popInput(ThreadID tid);
  
      /** Generate Branch data based (into branch) on an observed (or not)
       *  change in PC while executing an instruction.
@@ -193,7 +220,7 @@ class Execute : public Named
  
      /** Actually create a branch to communicate to Fetch1/Fetch2 and,
       *  if that is a stream-changing branch update the streamSeqNum */
-    void updateBranchData(BranchData::Reason reason,
+    void updateBranchData(ThreadID tid, BranchData::Reason reason,
          MinorDynInstPtr inst, const TheISA::PCState &target,
          BranchData &branch);
  
@@ -224,23 +251,32 @@ class Execute : public Named
      bool isInterrupted(ThreadID thread_id) const;
  
      /** Are we between instructions?  Can we be interrupted? */
-    bool isInbetweenInsts() const;
+    bool isInbetweenInsts(ThreadID thread_id) const;
  
      /** Act on an interrupt.  Returns true if an interrupt was actually
       *  signalled and invoked */
      bool takeInterrupt(ThreadID thread_id, BranchData &branch);
  
      /** Try and issue instructions from the inputBuffer */
-    unsigned int issue(bool only_issue_microops);
+    unsigned int issue(ThreadID thread_id);
  
      /** Try to act on PC-related events.  Returns true if any were
       *  executed */
-    bool tryPCEvents();
+    bool tryPCEvents(ThreadID thread_id);
  
      /** Do the stats handling and instruction count and PC event events
       *  related to the new instruction/op counts */
      void doInstCommitAccounting(MinorDynInstPtr inst);
  
+    /** Check all threads for possible interrupts. If interrupt is taken,
+     *  returns the tid of the thread.  interrupted is set if any thread
+     *  has an interrupt, irrespective of if it is taken */
+    ThreadID checkInterrupts(BranchData& branch, bool& interrupted);
+
+    /** Checks if a specific thread has an interrupt.  No action is taken.
+     *  this is used for determining if a thread should only commit microops */
+    bool hasInterrupt(ThreadID thread_id);
+
      /** Commit a single instruction.  Returns true if the instruction being
       *  examined was completed (fully executed, discarded, or initiated a
       *  memory access), false if there is still some processing to do.
@@ -266,10 +302,16 @@ class Execute : public Named
       *  If discard is true then discard all instructions rather than
       *  committing.
       *  branch is set to any branch raised during commit. */
-    void commit(bool only_commit_microops, bool discard, BranchData &branch);
+    void commit(ThreadID thread_id, bool only_commit_microops, bool discard,
+        BranchData &branch);
  
      /** Set the drain state (with useful debugging messages) */
-    void setDrainState(DrainState state);
+    void setDrainState(ThreadID thread_id, DrainState state);
+
+    /** Use the current threading policy to determine the next thread to
+     *  decode from. */
+    ThreadID getCommittingThread();
+    ThreadID getIssuingThread();
  
    public:
      Execute(const std::string &name_,
@@ -282,12 +324,6 @@ class Execute : public Named
  
    public:
  
-    /** Cause Execute to issue an UnpredictedBranch (or WakeupFetch if
-     *  that was passed as the reason) to Fetch1 to wake the
-     *  system up (using the PC from the thread context). */
-    void wakeupFetch(BranchData::Reason reason =
-        BranchData::UnpredictedBranch);
-
      /** Returns the DcachePort owned by this Execute to pass upwards */
      MinorCPU::MinorCPUPort &getDcachePort();
  
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc

index d19d7b042657f52212990c2338ae909446efce48..f4f1205349c8eb9d20e2c003077021fe43c72365 100644 (file)
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -57,7 +57,7 @@ Fetch1::Fetch1(const std::string &name_,
      Latch<BranchData>::Output inp_,
      Latch<ForwardLineData>::Input out_,
      Latch<BranchData>::Output prediction_,
-    Reservable &next_stage_input_buffer) :
+    std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer) :
      Named(name_),
      cpu(cpu_),
      inp(inp_),
@@ -68,11 +68,8 @@ Fetch1::Fetch1(const std::string &name_,
      lineSnap(params.fetch1LineSnapWidth),
      maxLineWidth(params.fetch1LineWidth),
      fetchLimit(params.fetch1FetchLimit),
-    state(FetchWaitingForPC),
-    pc(0),
-    streamSeqNum(InstId::firstStreamSeqNum),
-    predictionSeqNum(InstId::firstPredictionSeqNum),
-    blocked(false),
+    fetchInfo(params.numThreads),
+    threadPriority(0),
      requests(name_ + ".requests", "lines", params.fetch1FetchLimit),
      transfers(name_ + ".transfers", "lines", params.fetch1FetchLimit),
      icacheState(IcacheRunning),
@@ -114,32 +111,67 @@ Fetch1::Fetch1(const std::string &name_,
      }
  }
  
+inline ThreadID
+Fetch1::getScheduledThread()
+{
+    /* Select thread via policy. */
+    std::vector<ThreadID> priority_list;
+
+    switch (cpu.threadPolicy) {
+      case Enums::SingleThreaded:
+        priority_list.push_back(0);
+        break;
+      case Enums::RoundRobin:
+        priority_list = cpu.roundRobinPriority(threadPriority);
+        break;
+      case Enums::Random:
+        priority_list = cpu.randomPriority();
+        break;
+      default:
+        panic("Unknown fetch policy");
+    }
+
+    for (auto tid : priority_list) {
+        if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+            !fetchInfo[tid].blocked &&
+            fetchInfo[tid].state == FetchRunning) {
+            threadPriority = tid;
+            return tid;
+        }
+    }
+
+   return InvalidThreadID;
+}
+
  void
-Fetch1::fetchLine()
+Fetch1::fetchLine(ThreadID tid)
  {
+    /* Reference the currently used thread state. */
+    Fetch1ThreadInfo &thread = fetchInfo[tid];
+
      /* If line_offset != 0, a request is pushed for the remainder of the
       * line. */
      /* Use a lower, sizeof(MachInst) aligned address for the fetch */
-    Addr aligned_pc = pc.instAddr() & ~((Addr) lineSnap - 1);
+    Addr aligned_pc = thread.pc.instAddr() & ~((Addr) lineSnap - 1);
      unsigned int line_offset = aligned_pc % lineSnap;
      unsigned int request_size = maxLineWidth - line_offset;
  
      /* Fill in the line's id */
-    InstId request_id(0 /* thread */,
-        streamSeqNum, predictionSeqNum,
+    InstId request_id(tid,
+        thread.streamSeqNum, thread.predictionSeqNum,
          lineSeqNum);
  
-    FetchRequestPtr request = new FetchRequest(*this, request_id, pc);
+    FetchRequestPtr request = new FetchRequest(*this, request_id, thread.pc);
  
      DPRINTF(Fetch, "Inserting fetch into the fetch queue "
          "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n",
-        request_id, aligned_pc, pc, line_offset, request_size);
+        request_id, aligned_pc, thread.pc, line_offset, request_size);
  
-    request->request.setContext(cpu.threads[0]->getTC()->contextId());
+    request->request.setContext(cpu.threads[tid]->getTC()->contextId());
      request->request.setVirt(0 /* asid */,
          aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(),
          /* I've no idea why we need the PC, but give it */
-        pc.instAddr());
+        thread.pc.instAddr());
  
      DPRINTF(Fetch, "Submitting ITLB request\n");
      numFetchesInITLB++;
@@ -165,12 +197,12 @@ Fetch1::fetchLine()
       * reliable 'new' PC if the next line has a new stream sequence number. */
  #if THE_ISA == ALPHA_ISA
      /* Restore the low bits of the PC used as address space flags */
-    Addr pc_low_bits = pc.instAddr() &
+    Addr pc_low_bits = thread.pc.instAddr() &
          ((Addr) (1 << sizeof(TheISA::MachInst)) - 1);
  
-    pc.set(aligned_pc + request_size + pc_low_bits);
+    thread.pc.set(aligned_pc + request_size + pc_low_bits);
  #else
-    pc.set(aligned_pc + request_size);
+    thread.pc.set(aligned_pc + request_size);
  #endif
  }
  
@@ -454,46 +486,58 @@ operator <<(std::ostream &os, Fetch1::FetchState state)
  void
  Fetch1::changeStream(const BranchData &branch)
  {
+    Fetch1ThreadInfo &thread = fetchInfo[branch.threadId];
+
      updateExpectedSeqNums(branch);
  
      /* Start fetching again if we were stopped */
      switch (branch.reason) {
        case BranchData::SuspendThread:
-        DPRINTF(Fetch, "Suspending fetch: %s\n", branch);
-        state = FetchWaitingForPC;
+        {
+            if (thread.wakeupGuard) {
+                DPRINTF(Fetch, "Not suspending fetch due to guard: %s\n",
+                                branch);
+            } else {
+                DPRINTF(Fetch, "Suspending fetch: %s\n", branch);
+                thread.state = FetchWaitingForPC;
+            }
+        }
          break;
        case BranchData::HaltFetch:
          DPRINTF(Fetch, "Halting fetch\n");
-        state = FetchHalted;
+        thread.state = FetchHalted;
          break;
        default:
          DPRINTF(Fetch, "Changing stream on branch: %s\n", branch);
-        state = FetchRunning;
+        thread.state = FetchRunning;
          break;
      }
-    pc = branch.target;
+    thread.pc = branch.target;
  }
  
  void
  Fetch1::updateExpectedSeqNums(const BranchData &branch)
  {
+    Fetch1ThreadInfo &thread = fetchInfo[branch.threadId];
+
      DPRINTF(Fetch, "Updating streamSeqNum from: %d to %d,"
          " predictionSeqNum from: %d to %d\n",
-        streamSeqNum, branch.newStreamSeqNum,
-        predictionSeqNum, branch.newPredictionSeqNum);
+        thread.streamSeqNum, branch.newStreamSeqNum,
+        thread.predictionSeqNum, branch.newPredictionSeqNum);
  
      /* Change the stream */
-    streamSeqNum = branch.newStreamSeqNum;
+    thread.streamSeqNum = branch.newStreamSeqNum;
      /* Update the prediction.  Note that it's possible for this to
       *  actually set the prediction to an *older* value if new
       *  predictions have been discarded by execute */
-    predictionSeqNum = branch.newPredictionSeqNum;
+    thread.predictionSeqNum = branch.newPredictionSeqNum;
  }
  
  void
  Fetch1::processResponse(Fetch1::FetchRequestPtr response,
      ForwardLineData &line)
  {
+    Fetch1ThreadInfo &thread = fetchInfo[response->id.threadId];
      PacketPtr packet = response->packet;
  
      /* Pass the prefetch abort (if any) on to Fetch2 in a ForwardLineData
@@ -514,7 +558,7 @@ Fetch1::processResponse(Fetch1::FetchRequestPtr response,
           * can't (currently) selectively remove this stream from the queues */
          DPRINTF(Fetch, "Stopping line fetch because of fault: %s\n",
              response->fault->name());
-        state = Fetch1::FetchWaitingForPC;
+        thread.state = Fetch1::FetchWaitingForPC;
      } else {
          line.adoptPacketData(packet);
          /* Null the response's packet to prevent the response from trying to
@@ -532,61 +576,86 @@ Fetch1::evaluate()
  
      assert(line_out.isBubble());
  
-    blocked = !nextStageReserve.canReserve();
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++)
+        fetchInfo[tid].blocked = !nextStageReserve[tid].canReserve();
  
-    /* Are we changing stream?  Look to the Execute branches first, then
-     * to predicted changes of stream from Fetch2 */
-    /* @todo, find better way to express ignoring branch predictions */
-    if (execute_branch.isStreamChange() &&
-        execute_branch.reason != BranchData::BranchPrediction)
-    {
-        if (state == FetchHalted) {
-            if (execute_branch.reason == BranchData::WakeupFetch) {
-                DPRINTF(Fetch, "Waking up fetch: %s\n", execute_branch);
+    /** Are both branches from later stages valid and for the same thread? */
+    if (execute_branch.threadId != InvalidThreadID &&
+        execute_branch.threadId == fetch2_branch.threadId) {
+
+        Fetch1ThreadInfo &thread = fetchInfo[execute_branch.threadId];
+
+        /* Are we changing stream?  Look to the Execute branches first, then
+         * to predicted changes of stream from Fetch2 */
+        if (execute_branch.isStreamChange()) {
+            if (thread.state == FetchHalted) {
+                DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch);
+            } else {
                  changeStream(execute_branch);
+            }
+
+            if (!fetch2_branch.isBubble()) {
+                DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n",
+                    fetch2_branch);
+            }
+
+            /* The streamSeqNum tagging in request/response ->req should handle
+             *  discarding those requests when we get to them. */
+        } else if (thread.state != FetchHalted && fetch2_branch.isStreamChange()) {
+            /* Handle branch predictions by changing the instruction source
+             * if we're still processing the same stream (as set by streamSeqNum)
+             * as the one of the prediction.
+             */
+            if (fetch2_branch.newStreamSeqNum != thread.streamSeqNum) {
+                DPRINTF(Fetch, "Not changing stream on prediction: %s,"
+                    " streamSeqNum mismatch\n",
+                    fetch2_branch);
              } else {
-                DPRINTF(Fetch, "Halted, ignoring branch: %s\n",
-                    execute_branch);
+                changeStream(fetch2_branch);
              }
-        } else {
-            changeStream(execute_branch);
          }
+    } else {
+        /* Fetch2 and Execute branches are for different threads */
+        if (execute_branch.threadId != InvalidThreadID &&
+            execute_branch.isStreamChange()) {
  
-        if (!fetch2_branch.isBubble()) {
-            DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n",
-                fetch2_branch);
+            if (fetchInfo[execute_branch.threadId].state == FetchHalted) {
+                DPRINTF(Fetch, "Halted, ignoring branch: %s\n", execute_branch);
+            } else {
+                changeStream(execute_branch);
+            }
          }
  
-        /* The streamSeqNum tagging in request/response ->req should handle
-         *  discarding those requests when we get to them. */
-    } else if (state != FetchHalted && fetch2_branch.isStreamChange()) {
-        /* Handle branch predictions by changing the instruction source
-         * if we're still processing the same stream (as set by streamSeqNum)
-         * as the one of the prediction.
-         */
-        if (fetch2_branch.newStreamSeqNum != streamSeqNum) {
-            DPRINTF(Fetch, "Not changing stream on prediction: %s,"
-                " streamSeqNum mismatch\n",
-                fetch2_branch);
-        } else {
-            changeStream(fetch2_branch);
+        if (fetch2_branch.threadId != InvalidThreadID &&
+            fetch2_branch.isStreamChange()) {
+
+            if (fetchInfo[fetch2_branch.threadId].state == FetchHalted) {
+                DPRINTF(Fetch, "Halted, ignoring branch: %s\n", fetch2_branch);
+            } else if (fetch2_branch.newStreamSeqNum != fetchInfo[fetch2_branch.threadId].streamSeqNum) {
+                DPRINTF(Fetch, "Not changing stream on prediction: %s,"
+                    " streamSeqNum mismatch\n", fetch2_branch);
+            } else {
+                changeStream(fetch2_branch);
+            }
          }
      }
  
-    /* Can we fetch? */
-    /* The bare minimum requirements for initiating a fetch */
-    /* THREAD need to handle multiple threads */
-    if (state == FetchRunning && /* We are actually fetching */
-        !blocked && /* Space in the Fetch2 inputBuffer */
-        /* The thread we're going to fetch for (thread 0), is active */
-        cpu.getContext(0)->status() == ThreadContext::Active &&
-        numInFlightFetches() < fetchLimit)
-    {
-        fetchLine();
-        /* Take up a slot in the fetch queue */
-        nextStageReserve.reserve();
+    if (numInFlightFetches() < fetchLimit) {
+        ThreadID fetch_tid = getScheduledThread();
+
+        if (fetch_tid != InvalidThreadID) {
+            DPRINTF(Fetch, "Fetching from thread %d\n", fetch_tid);
+
+            /* Generate fetch to selected thread */
+            fetchLine(fetch_tid);
+            /* Take up a slot in the fetch queue */
+            nextStageReserve[fetch_tid].reserve();
+        } else {
+            DPRINTF(Fetch, "No active threads available to fetch from\n");
+        }
      }
  
+
      /* Halting shouldn't prevent fetches in flight from being processed */
      /* Step fetches through the icachePort queues and memory system */
      stepQueues();
@@ -599,9 +668,9 @@ Fetch1::evaluate()
          Fetch1::FetchRequestPtr response = transfers.front();
  
          if (response->isDiscardable()) {
-            nextStageReserve.freeReservation();
+            nextStageReserve[response->id.threadId].freeReservation();
  
-            DPRINTF(Fetch, "Discarding translated fetch at it's for"
+            DPRINTF(Fetch, "Discarding translated fetch as it's for"
                  " an old stream\n");
  
              /* Wake up next cycle just in case there was some other
@@ -626,19 +695,49 @@ Fetch1::evaluate()
       *  generate a line output (tested just above) or to initiate a memory
       *  fetch which will signal activity when it returns/needs stepping
       *  between queues */
+
+
+    /* This looks hackish.  And it is, but there doesn't seem to be a better
+     * way to do this.  The signal from commit to suspend fetch takes 1
+     * clock cycle to propagate to fetch.  However, a legitimate wakeup
+     * may occur between cycles from the memory system.  Thus wakeup guard
+     * prevents us from suspending in that case. */
+
+    for (auto& thread : fetchInfo) {
+        thread.wakeupGuard = false;
+    }
+}
+
+void
+Fetch1::wakeupFetch(ThreadID tid)
+{
+    ThreadContext *thread_ctx = cpu.getContext(tid);
+    Fetch1ThreadInfo &thread = fetchInfo[tid];
+    thread.pc = thread_ctx->pcState();
+    thread.state = FetchRunning;
+    thread.wakeupGuard = true;
+    DPRINTF(Fetch, "[tid:%d]: Changing stream wakeup %s\n",
+            tid, thread_ctx->pcState());
+
+    cpu.wakeupOnEvent(Pipeline::Fetch1StageId);
  }
  
  bool
  Fetch1::isDrained()
  {
-    DPRINTF(Drain, "isDrained %s %s%s\n",
-        state,
-        (numInFlightFetches() == 0 ? "" : "inFlightFetches "),
-        ((*out.inputWire).isBubble() ? "" : "outputtingLine"));
-
-    return state == FetchHalted &&
-        numInFlightFetches() == 0 &&
-        (*out.inputWire).isBubble();
+    bool drained = numInFlightFetches() == 0 && (*out.inputWire).isBubble();
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        Fetch1ThreadInfo &thread = fetchInfo[tid];
+        DPRINTF(Drain, "isDrained[tid:%d]: %s %s%s\n",
+                tid,
+                thread.state == FetchHalted,
+                (numInFlightFetches() == 0 ? "" : "inFlightFetches "),
+                ((*out.inputWire).isBubble() ? "" : "outputtingLine"));
+
+        drained = drained && thread.state == FetchHalted;
+    }
+
+    return drained;
  }
  
  void
@@ -649,26 +748,32 @@ Fetch1::FetchRequest::reportData(std::ostream &os) const
  
  bool Fetch1::FetchRequest::isDiscardable() const
  {
+    Fetch1ThreadInfo &thread = fetch.fetchInfo[id.threadId];
+
      /* Can't discard lines in TLB/memory */
      return state != InTranslation && state != RequestIssuing &&
-        (id.streamSeqNum != fetch.streamSeqNum ||
-        id.predictionSeqNum != fetch.predictionSeqNum);
+        (id.streamSeqNum != thread.streamSeqNum ||
+        id.predictionSeqNum != thread.predictionSeqNum);
  }
  
  void
  Fetch1::minorTrace() const
  {
+    // TODO: Un-bork minorTrace for THREADS
+    // bork bork bork
+    const Fetch1ThreadInfo &thread = fetchInfo[0];
+
      std::ostringstream data;
  
-    if (blocked)
+    if (thread.blocked)
          data << 'B';
      else
          (*out.inputWire).reportData(data);
  
      MINORTRACE("state=%s icacheState=%s in_tlb_mem=%s/%s"
-        " streamSeqNum=%d lines=%s\n", state, icacheState,
+        " streamSeqNum=%d lines=%s\n", thread.state, icacheState,
          numFetchesInITLB, numFetchesInMemorySystem,
-        streamSeqNum, data.str());
+        thread.streamSeqNum, data.str());
      requests.minorTrace();
      transfers.minorTrace();
  }
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh

index d4a35c4687bd0a8eba6324fbed34735cb0d0b813..cf6c9d254c1e3c92de18759c7daa905dbed69b7a 100644 (file)
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -197,7 +197,7 @@ class Fetch1 : public Named
      Latch<BranchData>::Output prediction;
  
      /** Interface to reserve space in the next stage */
-    Reservable &nextStageReserve;
+    std::vector<InputBuffer<ForwardLineData>> &nextStageReserve;
  
      /** IcachePort to pass to the CPU.  Fetch1 is the only module that uses
       *  it. */
@@ -233,26 +233,53 @@ class Fetch1 : public Named
  
      /** Stage cycle-by-cycle state */
  
-    FetchState state;
+    struct Fetch1ThreadInfo {
  
-    /** Fetch PC value. This is updated by branches from Execute, branch
-     *  prediction targets from Fetch2 and by incrementing it as we fetch
-     *  lines subsequent to those two sources. */
-    TheISA::PCState pc;
+        /** Consturctor to initialize all fields. */
+        Fetch1ThreadInfo() :
+            state(FetchWaitingForPC),
+            pc(TheISA::PCState(0)),
+            streamSeqNum(InstId::firstStreamSeqNum),
+            predictionSeqNum(InstId::firstPredictionSeqNum),
+            blocked(false),
+            wakeupGuard(false)
+        { }
+
+        Fetch1ThreadInfo(const Fetch1ThreadInfo& other) :
+            state(other.state),
+            pc(other.pc),
+            streamSeqNum(other.streamSeqNum),
+            predictionSeqNum(other.predictionSeqNum),
+            blocked(other.blocked)
+        { }
+
+        FetchState state;
+
+        /** Fetch PC value. This is updated by branches from Execute, branch
+         *  prediction targets from Fetch2 and by incrementing it as we fetch
+         *  lines subsequent to those two sources. */
+        TheISA::PCState pc;
  
-    /** Stream sequence number.  This changes on request from Execute and is
-     *  used to tag instructions by the fetch stream to which they belong.
-     *  Execute originates new prediction sequence numbers. */
-    InstSeqNum streamSeqNum;
+        /** Stream sequence number.  This changes on request from Execute and is
+         *  used to tag instructions by the fetch stream to which they belong.
+         *  Execute originates new prediction sequence numbers. */
+        InstSeqNum streamSeqNum;
  
-    /** Prediction sequence number.  This changes when requests from Execute
-     *  or Fetch2 ask for a change of fetch address and is used to tag lines
-     *  by the prediction to which they belong.  Fetch2 originates
-     *  prediction sequence numbers. */
-    InstSeqNum predictionSeqNum;
+        /** Prediction sequence number.  This changes when requests from Execute
+         *  or Fetch2 ask for a change of fetch address and is used to tag lines
+         *  by the prediction to which they belong.  Fetch2 originates
+         *  prediction sequence numbers. */
+        InstSeqNum predictionSeqNum;
  
-    /** Blocked indication for report */
-    bool blocked;
+        /** Blocked indication for report */
+        bool blocked;
+
+        /** Signal to guard against sleeping first cycle of wakeup */
+        bool wakeupGuard;
+    };
+
+    std::vector<Fetch1ThreadInfo> fetchInfo;
+    ThreadID threadPriority;
  
      /** State of memory access for head instruction fetch */
      enum IcacheState
@@ -307,10 +334,15 @@ class Fetch1 : public Named
      friend std::ostream &operator <<(std::ostream &os,
          IcacheState state);
  
+
+    /** Use the current threading policy to determine the next thread to
+     *  fetch from. */
+    ThreadID getScheduledThread();
+
      /** Insert a line fetch into the requests.  This can be a partial
       *  line request where the given address has a non-0 offset into a
       *  line. */
-    void fetchLine();
+    void fetchLine(ThreadID tid);
  
      /** Try and issue a fetch for a translated request at the
       *  head of the requests queue.  Also tries to move the request
@@ -354,7 +386,7 @@ class Fetch1 : public Named
          Latch<BranchData>::Output inp_,
          Latch<ForwardLineData>::Input out_,
          Latch<BranchData>::Output prediction_,
-        Reservable &next_stage_input_buffer);
+        std::vector<InputBuffer<ForwardLineData>> &next_stage_input_buffer);
  
    public:
      /** Returns the IcachePort owned by this Fetch1 */
@@ -363,6 +395,9 @@ class Fetch1 : public Named
      /** Pass on input/buffer data to the output if you can */
      void evaluate();
  
+    /** Initiate fetch1 fetching */
+    void wakeupFetch(ThreadID tid);
+
      void minorTrace() const;
  
      /** Is this stage drained?  For Fetch1, draining is initiated by
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc

index cb45f16e3e6ca0070ca5aaf37bcef3888d09aded..ae02b1c229e094fcae82353ad6aecafaf24664b7 100644 (file)
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -58,7 +58,7 @@ Fetch2::Fetch2(const std::string &name,
      Latch<BranchData>::Output branchInp_,
      Latch<BranchData>::Input predictionOut_,
      Latch<ForwardInstData>::Input out_,
-    Reservable &next_stage_input_buffer) :
+    std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer) :
      Named(name),
      cpu(cpu_),
      inp(inp_),
@@ -69,15 +69,8 @@ Fetch2::Fetch2(const std::string &name,
      outputWidth(params.decodeInputWidth),
      processMoreThanOneInput(params.fetch2CycleInput),
      branchPredictor(*params.branchPred),
-    inputBuffer(name + ".inputBuffer", "lines", params.fetch2InputBufferSize),
-    inputIndex(0),
-    pc(TheISA::PCState(0)),
-    havePC(false),
-    lastStreamSeqNum(InstId::firstStreamSeqNum),
-    fetchSeqNum(InstId::firstFetchSeqNum),
-    expectedStreamSeqNum(InstId::firstStreamSeqNum),
-    predictionSeqNum(InstId::firstPredictionSeqNum),
-    blocked(false)
+    fetchInfo(params.numThreads),
+    threadPriority(0)
  {
      if (outputWidth < 1)
          fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth);
@@ -86,38 +79,46 @@ Fetch2::Fetch2(const std::string &name,
          fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name,
          params.fetch2InputBufferSize);
      }
+
+    /* Per-thread input buffers */
+    for (ThreadID tid = 0; tid < params.numThreads; tid++) {
+        inputBuffer.push_back(
+            InputBuffer<ForwardLineData>(
+                name + ".inputBuffer" + std::to_string(tid), "lines",
+                params.fetch2InputBufferSize));
+    }
  }
  
  const ForwardLineData *
-Fetch2::getInput()
+Fetch2::getInput(ThreadID tid)
  {
      /* Get a line from the inputBuffer to work with */
-    if (!inputBuffer.empty()) {
-        return &(inputBuffer.front());
+    if (!inputBuffer[tid].empty()) {
+        return &(inputBuffer[tid].front());
      } else {
          return NULL;
      }
  }
  
  void
-Fetch2::popInput()
+Fetch2::popInput(ThreadID tid)
  {
-    if (!inputBuffer.empty()) {
-        inputBuffer.front().freeLine();
-        inputBuffer.pop();
+    if (!inputBuffer[tid].empty()) {
+        inputBuffer[tid].front().freeLine();
+        inputBuffer[tid].pop();
      }
  
-    inputIndex = 0;
+    fetchInfo[tid].inputIndex = 0;
  }
  
  void
-Fetch2::dumpAllInput()
+Fetch2::dumpAllInput(ThreadID tid)
  {
      DPRINTF(Fetch, "Dumping whole input buffer\n");
-    while (!inputBuffer.empty())
-        popInput();
+    while (!inputBuffer[tid].empty())
+        popInput(tid);
  
-    inputIndex = 0;
+    fetchInfo[tid].inputIndex = 0;
  }
  
  void
@@ -139,9 +140,6 @@ Fetch2::updateBranchPrediction(const BranchData &branch)
        case BranchData::SuspendThread:
          /* Don't need to act on suspends */
          break;
-      case BranchData::WakeupFetch:
-        /* Don't need to act on wakeups, no instruction tied to action. */
-        break;
        case BranchData::HaltFetch:
          /* Don't need to act on fetch wakeup */
          break;
@@ -180,6 +178,7 @@ Fetch2::updateBranchPrediction(const BranchData &branch)
  void
  Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
  {
+    Fetch2ThreadInfo &thread = fetchInfo[inst->id.threadId];
      TheISA::PCState inst_pc = inst->pc;
  
      assert(!inst->predictedTaken);
@@ -209,35 +208,37 @@ Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch)
      if (inst->predictedTaken) {
          /* Update the predictionSeqNum and remember the streamSeqNum that it
           *  was associated with */
-        expectedStreamSeqNum = inst->id.streamSeqNum;
+        thread.expectedStreamSeqNum = inst->id.streamSeqNum;
  
          BranchData new_branch = BranchData(BranchData::BranchPrediction,
-            inst->id.streamSeqNum, predictionSeqNum + 1,
+            inst->id.threadId,
+            inst->id.streamSeqNum, thread.predictionSeqNum + 1,
              inst->predictedTarget, inst);
  
          /* Mark with a new prediction number by the stream number of the
           *  instruction causing the prediction */
-        predictionSeqNum++;
+        thread.predictionSeqNum++;
          branch = new_branch;
  
          DPRINTF(Branch, "Branch predicted taken inst: %s target: %s"
              " new predictionSeqNum: %d\n",
-            *inst, inst->predictedTarget, predictionSeqNum);
+            *inst, inst->predictedTarget, thread.predictionSeqNum);
      }
  }
  
  void
  Fetch2::evaluate()
  {
-    inputBuffer.setTail(*inp.outputWire);
+    /* Push input onto appropriate input buffer */
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->id.threadId].setTail(*inp.outputWire);
+
      ForwardInstData &insts_out = *out.inputWire;
      BranchData prediction;
      BranchData &branch_inp = *branchInp.outputWire;
  
      assert(insts_out.isBubble());
  
-    blocked = false;
-
      /* React to branches from Execute to update local branch prediction
       *  structures */
      updateBranchPrediction(branch_inp);
@@ -247,39 +248,48 @@ Fetch2::evaluate()
      if (branch_inp.isStreamChange()) {
          DPRINTF(Fetch, "Dumping all input as a stream changing branch"
              " has arrived\n");
-        dumpAllInput();
-        havePC = false;
+        dumpAllInput(branch_inp.threadId);
+        fetchInfo[branch_inp.threadId].havePC = false;
      }
  
+    assert(insts_out.isBubble());
      /* Even when blocked, clear out input lines with the wrong
       *  prediction sequence number */
-    {
-        const ForwardLineData *line_in = getInput();
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        Fetch2ThreadInfo &thread = fetchInfo[tid];
+
+        thread.blocked = !nextStageReserve[tid].canReserve();
+
+        const ForwardLineData *line_in = getInput(tid);
  
          while (line_in &&
-            expectedStreamSeqNum == line_in->id.streamSeqNum &&
-            predictionSeqNum != line_in->id.predictionSeqNum)
+            thread.expectedStreamSeqNum == line_in->id.streamSeqNum &&
+            thread.predictionSeqNum != line_in->id.predictionSeqNum)
          {
              DPRINTF(Fetch, "Discarding line %s"
                  " due to predictionSeqNum mismatch (expected: %d)\n",
-                line_in->id, predictionSeqNum);
+                line_in->id, thread.predictionSeqNum);
  
-            popInput();
-            havePC = false;
+            popInput(tid);
+            fetchInfo[tid].havePC = false;
  
              if (processMoreThanOneInput) {
                  DPRINTF(Fetch, "Wrapping\n");
-                line_in = getInput();
+                line_in = getInput(tid);
              } else {
                  line_in = NULL;
              }
          }
      }
  
-    if (!nextStageReserve.canReserve()) {
-        blocked = true;
-    } else {
-        const ForwardLineData *line_in = getInput();
+    ThreadID tid = getScheduledThread();
+    DPRINTF(Fetch, "Scheduled Thread: %d\n", tid);
+
+    assert(insts_out.isBubble());
+    if (tid != InvalidThreadID) {
+        Fetch2ThreadInfo &fetch_info = fetchInfo[tid];
+
+        const ForwardLineData *line_in = getInput(tid);
  
          unsigned int output_index = 0;
  
@@ -288,7 +298,7 @@ Fetch2::evaluate()
           * for faulting lines */
          while (line_in &&
              (line_in->isFault() ||
-                inputIndex < line_in->lineWidth) && /* More input */
+                fetch_info.inputIndex < line_in->lineWidth) && /* More input */
              output_index < outputWidth && /* More output to fill */
              prediction.isBubble() /* No predicted branch */)
          {
@@ -298,26 +308,26 @@ Fetch2::evaluate()
              /* Discard line due to prediction sequence number being wrong but
               * without the streamSeqNum number having changed */
              bool discard_line =
-                expectedStreamSeqNum == line_in->id.streamSeqNum &&
-                predictionSeqNum != line_in->id.predictionSeqNum;
+                fetch_info.expectedStreamSeqNum == line_in->id.streamSeqNum &&
+                fetch_info.predictionSeqNum != line_in->id.predictionSeqNum;
  
              /* Set the PC if the stream changes.  Setting havePC to false in
               *  a previous cycle handles all other change of flow of control
               *  issues */
-            bool set_pc = lastStreamSeqNum != line_in->id.streamSeqNum;
+            bool set_pc = fetch_info.lastStreamSeqNum != line_in->id.streamSeqNum;
  
-            if (!discard_line && (!havePC || set_pc)) {
+            if (!discard_line && (!fetch_info.havePC || set_pc)) {
                  /* Set the inputIndex to be the MachInst-aligned offset
                   *  from lineBaseAddr of the new PC value */
-                inputIndex =
+                fetch_info.inputIndex =
                      (line_in->pc.instAddr() & BaseCPU::PCMask) -
                      line_in->lineBaseAddr;
                  DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x"
                      " lineBaseAddr: 0x%x lineWidth: 0x%x\n",
-                    line_in->pc, inputIndex, line_in->lineBaseAddr,
+                    line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
                      line_in->lineWidth);
-                pc = line_in->pc;
-                havePC = true;
+                fetch_info.pc = line_in->pc;
+                fetch_info.havePC = true;
                  decoder->reset();
              }
  
@@ -330,7 +340,8 @@ Fetch2::evaluate()
                   *  stream */
                  DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)"
                      " due to predictionSeqNum mismatch (expected: %d)\n",
-                    line_in->id, inputIndex, predictionSeqNum);
+                    line_in->id, fetch_info.inputIndex,
+                    fetch_info.predictionSeqNum);
              } else if (line_in->isFault()) {
                  /* Pack a fault as a MinorDynInst with ->fault set */
  
@@ -339,13 +350,13 @@ Fetch2::evaluate()
                  dyn_inst = new MinorDynInst(line_in->id);
  
                  /* Fetch and prediction sequence numbers originate here */
-                dyn_inst->id.fetchSeqNum = fetchSeqNum;
-                dyn_inst->id.predictionSeqNum = predictionSeqNum;
+                dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
+                dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
                  /* To complete the set, test that exec sequence number has
                   *  not been set */
                  assert(dyn_inst->id.execSeqNum == 0);
  
-                dyn_inst->pc = pc;
+                dyn_inst->pc = fetch_info.pc;
  
                  /* Pack a faulting instruction but allow other
                   *  instructions to be generated. (Fetch2 makes no
@@ -361,13 +372,14 @@ Fetch2::evaluate()
                   *  assign */
                  inst_word = TheISA::gtoh(
                      *(reinterpret_cast<TheISA::MachInst *>
-                    (line + inputIndex)));
+                    (line + fetch_info.inputIndex)));
  
                  if (!decoder->instReady()) {
-                    decoder->moreBytes(pc,
-                        line_in->lineBaseAddr + inputIndex, inst_word);
-                    DPRINTF(Fetch, "Offering MachInst to decoder"
-                        " addr: 0x%x\n", line_in->lineBaseAddr + inputIndex);
+                    decoder->moreBytes(fetch_info.pc,
+                        line_in->lineBaseAddr + fetch_info.inputIndex,
+                        inst_word);
+                    DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n",
+                            line_in->lineBaseAddr + fetch_info.inputIndex);
                  }
  
                  /* Maybe make the above a loop to accomodate ISAs with
@@ -379,8 +391,8 @@ Fetch2::evaluate()
                      dyn_inst = new MinorDynInst(line_in->id);
  
                      /* Fetch and prediction sequence numbers originate here */
-                    dyn_inst->id.fetchSeqNum = fetchSeqNum;
-                    dyn_inst->id.predictionSeqNum = predictionSeqNum;
+                    dyn_inst->id.fetchSeqNum = fetch_info.fetchSeqNum;
+                    dyn_inst->id.predictionSeqNum = fetch_info.predictionSeqNum;
                      /* To complete the set, test that exec sequence number
                       *  has not been set */
                      assert(dyn_inst->id.execSeqNum == 0);
@@ -388,17 +400,19 @@ Fetch2::evaluate()
                      /* Note that the decoder can update the given PC.
                       *  Remember not to assign it until *after* calling
                       *  decode */
-                    StaticInstPtr decoded_inst = decoder->decode(pc);
+                    StaticInstPtr decoded_inst = decoder->decode(fetch_info.pc);
                      dyn_inst->staticInst = decoded_inst;
  
-                    dyn_inst->pc = pc;
+                    dyn_inst->pc = fetch_info.pc;
+                    DPRINTF(Fetch, "decoder inst %s\n", *dyn_inst);
+
  
                      DPRINTF(Fetch, "Instruction extracted from line %s"
                          " lineWidth: %d output_index: %d inputIndex: %d"
                          " pc: %s inst: %s\n",
                          line_in->id,
-                        line_in->lineWidth, output_index, inputIndex,
-                        pc, *dyn_inst);
+                        line_in->lineWidth, output_index, fetch_info.inputIndex,
+                        fetch_info.pc, *dyn_inst);
  
  #if THE_ISA == X86_ISA || THE_ISA == ARM_ISA
                      /* In SE mode, it's possible to branch to a microop when
@@ -415,12 +429,12 @@ Fetch2::evaluate()
                       * the case that, after a branch, the first un-advanced PC
                       * may be pointing to a microop other than 0.  Once
                       * advanced, however, the microop number *must* be 0 */
-                    pc.upc(0);
-                    pc.nupc(1);
+                    fetch_info.pc.upc(0);
+                    fetch_info.pc.nupc(1);
  #endif
  
                      /* Advance PC for the next instruction */
-                    TheISA::advancePC(pc, decoded_inst);
+                    TheISA::advancePC(fetch_info.pc, decoded_inst);
  
                      /* Predict any branches and issue a branch if
                       *  necessary */
@@ -432,22 +446,23 @@ Fetch2::evaluate()
                  /* Step on the pointer into the line if there's no
                   *  complete instruction waiting */
                  if (decoder->needMoreBytes()) {
-                    inputIndex += sizeof(TheISA::MachInst);
+                    fetch_info.inputIndex += sizeof(TheISA::MachInst);
  
                  DPRINTF(Fetch, "Updated inputIndex value PC: %s"
                      " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n",
-                    line_in->pc, inputIndex, line_in->lineBaseAddr,
+                    line_in->pc, fetch_info.inputIndex, line_in->lineBaseAddr,
                      line_in->lineWidth);
                  }
              }
  
              if (dyn_inst) {
                  /* Step to next sequence number */
-                fetchSeqNum++;
+                fetch_info.fetchSeqNum++;
  
                  /* Correctly size the output before writing */
-                if (output_index == 0)
+                if (output_index == 0) {
                      insts_out.resize(outputWidth);
+                }
                  /* Pack the generated dynamic instruction into the output */
                  insts_out.insts[output_index] = dyn_inst;
                  output_index++;
@@ -463,7 +478,7 @@ Fetch2::evaluate()
  
              /* Remember the streamSeqNum of this line so we can tell when
               *  we change stream */
-            lastStreamSeqNum = line_in->id.streamSeqNum;
+            fetch_info.lastStreamSeqNum = line_in->id.streamSeqNum;
  
              /* Asked to discard line or there was a branch or fault */
              if (!prediction.isBubble() || /* The remains of a
@@ -471,33 +486,35 @@ Fetch2::evaluate()
                  line_in->isFault() /* A line which is just a fault */)
              {
                  DPRINTF(Fetch, "Discarding all input on branch/fault\n");
-                dumpAllInput();
-                havePC = false;
+                dumpAllInput(tid);
+                fetch_info.havePC = false;
                  line_in = NULL;
              } else if (discard_line) {
                  /* Just discard one line, one's behind it may have new
                   *  stream sequence numbers.  There's a DPRINTF above
                   *  for this event */
-                popInput();
-                havePC = false;
+                popInput(tid);
+                fetch_info.havePC = false;
                  line_in = NULL;
-            } else if (inputIndex == line_in->lineWidth) {
+            } else if (fetch_info.inputIndex == line_in->lineWidth) {
                  /* Got to end of a line, pop the line but keep PC
                   *  in case this is a line-wrapping inst. */
-                popInput();
+                popInput(tid);
                  line_in = NULL;
              }
  
              if (!line_in && processMoreThanOneInput) {
                  DPRINTF(Fetch, "Wrapping\n");
-                line_in = getInput();
+                line_in = getInput(tid);
              }
          }
  
          /* The rest of the output (if any) should already have been packed
           *  with bubble instructions by insts_out's initialisation */
      }
-
+    if (tid == InvalidThreadID) {
+        assert(insts_out.isBubble());
+    }
      /** Reserve a slot in the next stage and output data */
      *predictionOut.inputWire = prediction;
  
@@ -506,24 +523,66 @@ Fetch2::evaluate()
      if (!insts_out.isBubble()) {
          /* Note activity of following buffer */
          cpu.activityRecorder->activity();
-        nextStageReserve.reserve();
+        insts_out.threadId = tid;
+        nextStageReserve[tid].reserve();
      }
  
      /* If we still have input to process and somewhere to put it,
       *  mark stage as active */
-    if (getInput() && nextStageReserve.canReserve())
-        cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
+    for (ThreadID i = 0; i < cpu.numThreads; i++)
+    {
+        if (getInput(i) && nextStageReserve[i].canReserve()) {
+            cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId);
+            break;
+        }
+    }
  
      /* Make sure the input (if any left) is pushed */
-    inputBuffer.pushTail();
+    if (!inp.outputWire->isBubble())
+        inputBuffer[inp.outputWire->id.threadId].pushTail();
+}
+
+inline ThreadID
+Fetch2::getScheduledThread()
+{
+    /* Select thread via policy. */
+    std::vector<ThreadID> priority_list;
+
+    switch (cpu.threadPolicy) {
+      case Enums::SingleThreaded:
+        priority_list.push_back(0);
+        break;
+      case Enums::RoundRobin:
+        priority_list = cpu.roundRobinPriority(threadPriority);
+        break;
+      case Enums::Random:
+        priority_list = cpu.randomPriority();
+        break;
+      default:
+        panic("Unknown fetch policy");
+    }
+
+    for (auto tid : priority_list) {
+        if (cpu.getContext(tid)->status() == ThreadContext::Active &&
+            getInput(tid) && !fetchInfo[tid].blocked) {
+            threadPriority = tid;
+            return tid;
+        }
+    }
+
+   return InvalidThreadID;
  }
  
  bool
  Fetch2::isDrained()
  {
-    return inputBuffer.empty() &&
-        (*inp.outputWire).isBubble() &&
-        (*predictionOut.inputWire).isBubble();
+    for (const auto &buffer : inputBuffer) {
+        if (!buffer.empty())
+            return false;
+    }
+
+    return (*inp.outputWire).isBubble() &&
+           (*predictionOut.inputWire).isBubble();
  }
  
  void
@@ -531,14 +590,14 @@ Fetch2::minorTrace() const
  {
      std::ostringstream data;
  
-    if (blocked)
+    if (fetchInfo[0].blocked)
          data << 'B';
      else
          (*out.inputWire).reportData(data);
  
      MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n",
-        inputIndex, havePC, predictionSeqNum, data.str());
-    inputBuffer.minorTrace();
+        fetchInfo[0].inputIndex, fetchInfo[0].havePC, fetchInfo[0].predictionSeqNum, data.str());
+    inputBuffer[0].minorTrace();
  }
  
  }
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh

index 2fc38b377f59512d7683d820459fda8b148e2a14..33c683b82cf112ec399c8fd36bbaca14dea10040 100644 (file)
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -78,7 +78,7 @@ class Fetch2 : public Named
      Latch<ForwardInstData>::Input out;
  
      /** Interface to reserve space in the next stage */
-    Reservable &nextStageReserve;
+    std::vector<InputBuffer<ForwardInstData>> &nextStageReserve;
  
      /** Width of output of this stage/input of next in instructions */
      unsigned int outputWidth;
@@ -92,61 +92,90 @@ class Fetch2 : public Named
  
    public:
      /* Public so that Pipeline can pass it to Fetch1 */
-    InputBuffer<ForwardLineData> inputBuffer;
+    std::vector<InputBuffer<ForwardLineData>> inputBuffer;
  
    protected:
      /** Data members after this line are cycle-to-cycle state */
  
-    /** Index into an incompletely processed input line that instructions
-     *  are to be extracted from */
-    unsigned int inputIndex;
-
-    /** Remembered program counter value.  Between contiguous lines, this
-     *  is just updated with advancePC.  For lines following changes of
-     *  stream, a new PC must be loaded and havePC be set.
-     *  havePC is needed to accomodate instructions which span across
-     *  lines meaning that Fetch2 and the decoder need to remember a PC
-     *  value and a partially-offered instruction from the previous line */
-    TheISA::PCState pc;
-
-    /** PC is currently valid.  Initially false, gets set to true when a
-     *  change-of-stream line is received and false again when lines are
-     *  discarded for any reason */
-    bool havePC;
-
-    /** Stream sequence number of the last seen line used to identify changes
-     *  of instruction stream */
-    InstSeqNum lastStreamSeqNum;
-
-    /** Fetch2 is the source of fetch sequence numbers.  These represent the
-     *  sequence that instructions were extracted from fetched lines. */
-    InstSeqNum fetchSeqNum;
-
-    /** Stream sequence number remembered from last time the predictionSeqNum
-     *  changed.  Lines should only be discarded when their predictionSeqNums
-     *  disagree with Fetch2::predictionSeqNum *and* they are from the same
-     *  stream that bore that prediction number */
-    InstSeqNum expectedStreamSeqNum;
-
-    /** Fetch2 is the source of prediction sequence numbers.  These represent
-     *  predicted changes of control flow sources from branch prediction in
-     *  Fetch2. */
-    InstSeqNum predictionSeqNum;
-
-    /** Blocked indication for report */
-    bool blocked;
+    struct Fetch2ThreadInfo {
+
+        /** Default constructor */
+        Fetch2ThreadInfo() :
+            inputIndex(0),
+            pc(TheISA::PCState(0)),
+            havePC(false),
+            lastStreamSeqNum(InstId::firstStreamSeqNum),
+            fetchSeqNum(InstId::firstFetchSeqNum),
+            expectedStreamSeqNum(InstId::firstStreamSeqNum),
+            predictionSeqNum(InstId::firstPredictionSeqNum),
+            blocked(false)
+        { }
+
+        Fetch2ThreadInfo(const Fetch2ThreadInfo& other) :
+            inputIndex(other.inputIndex),
+            pc(other.pc),
+            havePC(other.havePC),
+            lastStreamSeqNum(other.lastStreamSeqNum),
+            expectedStreamSeqNum(other.expectedStreamSeqNum),
+            predictionSeqNum(other.predictionSeqNum),
+            blocked(other.blocked)
+        { }
+
+        /** Index into an incompletely processed input line that instructions
+         *  are to be extracted from */
+        unsigned int inputIndex;
+
+
+        /** Remembered program counter value.  Between contiguous lines, this
+         *  is just updated with advancePC.  For lines following changes of
+         *  stream, a new PC must be loaded and havePC be set.
+         *  havePC is needed to accomodate instructions which span across
+         *  lines meaning that Fetch2 and the decoder need to remember a PC
+         *  value and a partially-offered instruction from the previous line */
+        TheISA::PCState pc;
+
+        /** PC is currently valid.  Initially false, gets set to true when a
+         *  change-of-stream line is received and false again when lines are
+         *  discarded for any reason */
+        bool havePC;
+
+        /** Stream sequence number of the last seen line used to identify
+         *  changes of instruction stream */
+        InstSeqNum lastStreamSeqNum;
+
+        /** Fetch2 is the source of fetch sequence numbers.  These represent the
+         *  sequence that instructions were extracted from fetched lines. */
+        InstSeqNum fetchSeqNum;
+
+        /** Stream sequence number remembered from last time the
+         *  predictionSeqNum changed.  Lines should only be discarded when their
+         *  predictionSeqNums disagree with Fetch2::predictionSeqNum *and* they
+         *  are from the same stream that bore that prediction number */
+        InstSeqNum expectedStreamSeqNum;
+
+        /** Fetch2 is the source of prediction sequence numbers.  These
+         *  represent predicted changes of control flow sources from branch
+         *  prediction in Fetch2. */
+        InstSeqNum predictionSeqNum;
+
+        /** Blocked indication for report */
+        bool blocked;
+    };
+
+    std::vector<Fetch2ThreadInfo> fetchInfo;
+    ThreadID threadPriority;
  
    protected:
      /** Get a piece of data to work on from the inputBuffer, or 0 if there
       *  is no data. */
-    const ForwardLineData *getInput();
+    const ForwardLineData *getInput(ThreadID tid);
  
      /** Pop an element off the input buffer, if there are any */
-    void popInput();
+    void popInput(ThreadID tid);
  
      /** Dump the whole contents of the input buffer.  Useful after a
       *  prediction changes control flow */
-    void dumpAllInput();
+    void dumpAllInput(ThreadID tid);
  
      /** Update local branch prediction structures from feedback from
       *  Execute. */
@@ -157,6 +186,10 @@ class Fetch2 : public Named
       *  carries the prediction to Fetch1 */
      void predictBranch(MinorDynInstPtr inst, BranchData &branch);
  
+    /** Use the current threading policy to determine the next thread to
+     *  fetch from. */
+    ThreadID getScheduledThread();
+
    public:
      Fetch2(const std::string &name,
          MinorCPU &cpu_,
@@ -165,7 +198,7 @@ class Fetch2 : public Named
          Latch<BranchData>::Output branchInp_,
          Latch<BranchData>::Input predictionOut_,
          Latch<ForwardInstData>::Input out_,
-        Reservable &next_stage_input_buffer);
+        std::vector<InputBuffer<ForwardInstData>> &next_stage_input_buffer);
  
    public:
      /** Pass on input/buffer data to the output if you can */
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc

index b5c0bc97415231865c9f67faaadf3faa05a856ab..5995a52c26064bde811e94125b610d1b568953ed 100644 (file)
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -216,13 +216,14 @@ operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state)
  void
  LSQ::clearMemBarrier(MinorDynInstPtr inst)
  {
-    bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier;
+    bool is_last_barrier =
+        inst->id.execSeqNum >= lastMemBarrier[inst->id.threadId];
  
      DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n",
          (is_last_barrier ? "last" : "a"), *inst);
  
      if (is_last_barrier)
-        lastMemBarrier = 0;
+        lastMemBarrier[inst->id.threadId] = 0;
  }
  
  void
@@ -676,7 +677,8 @@ LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request,
      while (ret == NoAddrRangeCoverage && i != slots.rend()) {
          LSQRequestPtr slot = *i;
  
-        if (slot->packet) {
+        if (slot->packet &&
+            slot->inst->id.threadId == request->inst->id.threadId) {
              AddrRangeCoverage coverage = slot->containsAddrRangeOf(request);
  
              if (coverage != NoAddrRangeCoverage) {
@@ -1042,8 +1044,9 @@ LSQ::tryToSendToTransfers(LSQRequestPtr request)
              request->issuedToMemory = true;
          }
  
-        if (tryToSend(request))
+        if (tryToSend(request)) {
              moveFromRequestsToTransfers(request);
+        }
      } else {
          request->setState(LSQRequest::Complete);
          moveFromRequestsToTransfers(request);
@@ -1145,6 +1148,9 @@ LSQ::tryToSend(LSQRequestPtr request)
          }
      }
  
+    if (ret)
+        threadSnoop(request);
+
      return ret;
  }
  
@@ -1293,7 +1299,7 @@ LSQ::LSQ(std::string name_, std::string dcache_port_name_,
      cpu(cpu_),
      execute(execute_),
      dcachePort(dcache_port_name_, *this, cpu_),
-    lastMemBarrier(0),
+    lastMemBarrier(cpu.numThreads, 0),
      state(MemoryRunning),
      inMemorySystemLimit(in_memory_system_limit),
      lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)),
@@ -1526,7 +1532,7 @@ LSQ::minorTrace() const
      MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d"
          " lastMemBarrier=%d\n",
          state, numAccessesInDTLB, numAccessesInMemorySystem,
-        numStoresInTransfers, lastMemBarrier);
+        numStoresInTransfers, lastMemBarrier[0]);
      requests.minorTrace();
      transfers.minorTrace();
      storeBuffer.minorTrace();
@@ -1565,12 +1571,12 @@ void
  LSQ::issuedMemBarrierInst(MinorDynInstPtr inst)
  {
      assert(inst->isInst() && inst->staticInst->isMemBarrier());
-    assert(inst->id.execSeqNum > lastMemBarrier);
+    assert(inst->id.execSeqNum > lastMemBarrier[inst->id.threadId]);
  
      /* Remember the barrier.  We only have a notion of one
       *  barrier so this may result in some mem refs being
       *  delayed if they are between barriers */
-    lastMemBarrier = inst->id.execSeqNum;
+    lastMemBarrier[inst->id.threadId] = inst->id.execSeqNum;
  }
  
  void
@@ -1616,10 +1622,40 @@ LSQ::recvTimingSnoopReq(PacketPtr pkt)
      /* LLSC operations in Minor can't be speculative and are executed from
       * the head of the requests queue.  We shouldn't need to do more than
       * this action on snoops. */
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+            cpu.wakeup(tid);
+        }
+    }
  
-    /* THREAD */
      if (pkt->isInvalidate() || pkt->isWrite()) {
-        TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask);
+        for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+            TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
+                                      cacheBlockMask);
+        }
+    }
+}
+
+void
+LSQ::threadSnoop(LSQRequestPtr request)
+{
+    /* LLSC operations in Minor can't be speculative and are executed from
+     * the head of the requests queue.  We shouldn't need to do more than
+     * this action on snoops. */
+    ThreadID req_tid = request->inst->id.threadId;
+    PacketPtr pkt = request->packet;
+
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        if (tid != req_tid) {
+            if (cpu.getCpuAddrMonitor(tid)->doMonitor(pkt)) {
+                cpu.wakeup(tid);
+            }
+
+            if (pkt->isInvalidate() || pkt->isWrite()) {
+                TheISA::handleLockedSnoop(cpu.getContext(tid), pkt,
+                                          cacheBlockMask);
+            }
+        }
      }
  }
  
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh

index 09fb30d03b7c7e9c7506c7be70337cc2b37982c5..e0b72177c382afdc62ea3eef58ccaeaaf0ab78e1 100644 (file)
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -537,7 +537,7 @@ class LSQ : public Named
      /** Most recent execSeqNum of a memory barrier instruction or
       *  0 if there are no in-flight barriers.  Useful as a
       *  dependency for early-issued memory operations */
-    InstSeqNum lastMemBarrier;
+    std::vector<InstSeqNum> lastMemBarrier;
  
    public:
      /** Retry state of last issued memory transfer */
@@ -640,6 +640,9 @@ class LSQ : public Named
      /** Can a request be sent to the memory system */
      bool canSendToMemorySystem();
  
+    /** Snoop other threads monitors on memory system accesses */
+    void threadSnoop(LSQRequestPtr request);
+
    public:
      LSQ(std::string name_, std::string dcache_port_name_,
          MinorCPU &cpu_, Execute &execute_,
@@ -691,7 +694,8 @@ class LSQ : public Named
      void issuedMemBarrierInst(MinorDynInstPtr inst);
  
      /** Get the execSeqNum of the last issued memory barrier */
-    InstSeqNum getLastMemBarrier() const { return lastMemBarrier; }
+    InstSeqNum getLastMemBarrier(ThreadID thread_id) const
+    { return lastMemBarrier[thread_id]; }
  
      /** Is there nothing left in the LSQ */
      bool isDrained();
diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc

index 447f9c0e77c5b37c248fc94eb8d4058d64b6023c..208c9c9f4a03f01590bea80cb9401eb975883301 100644 (file)
--- a/src/cpu/minor/pipe_data.cc
+++ b/src/cpu/minor/pipe_data.cc
@@ -71,9 +71,6 @@ operator <<(std::ostream &os, BranchData::Reason reason)
        case BranchData::SuspendThread:
          os << "SuspendThread";
          break;
-      case BranchData::WakeupFetch:
-        os << "WakeupFetch";
-        break;
        case BranchData::HaltFetch:
          os << "HaltFetch";
          break;
@@ -102,7 +99,6 @@ BranchData::isStreamChange(const BranchData::Reason reason)
        case BadlyPredictedBranch:
        case SuspendThread:
        case Interrupt:
-      case WakeupFetch:
        case HaltFetch:
          ret = true;
          break;
@@ -123,7 +119,6 @@ BranchData::isBranch(const BranchData::Reason reason)
        case CorrectlyPredictedBranch:
        case SuspendThread:
        case Interrupt:
-      case WakeupFetch:
        case HaltFetch:
          ret = false;
          break;
@@ -228,8 +223,8 @@ ForwardLineData::reportData(std::ostream &os) const
          os << id;
  }
  
-ForwardInstData::ForwardInstData(unsigned int width) :
-    numInsts(width)
+ForwardInstData::ForwardInstData(unsigned int width, ThreadID tid) :
+    numInsts(width), threadId(tid)
  {
      bubbleFill();
  }
diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh

index 4468cb89e50b0b5ce78fa1058a4f87d48f5c8054..e514be2f949f1d695907328be855cb75afaca86e 100644 (file)
--- a/src/cpu/minor/pipe_data.hh
+++ b/src/cpu/minor/pipe_data.hh
@@ -91,8 +91,6 @@ class BranchData /* : public ReportIF, public BubbleIF */
           * count it as stream changing itself and expect pc to be the PC
           * of the next instruction */
          SuspendThread,
-        /* Wakeup fetching from Halted */
-        WakeupFetch,
          /* Branch from an interrupt (no instruction) */
          Interrupt,
          /* Stop fetching in anticipation of of draining */
@@ -112,6 +110,9 @@ class BranchData /* : public ReportIF, public BubbleIF */
      /** Explanation for this branch */
      Reason reason;
  
+    /** ThreadID associated with branch */
+    ThreadID threadId;
+
      /** Sequence number of new stream/prediction to be adopted */
      InstSeqNum newStreamSeqNum;
      InstSeqNum newPredictionSeqNum;
@@ -124,18 +125,20 @@ class BranchData /* : public ReportIF, public BubbleIF */
  
    public:
      BranchData() :
-        reason(NoBranch), newStreamSeqNum(0),
+        reason(NoBranch), threadId(InvalidThreadID), newStreamSeqNum(0),
          newPredictionSeqNum(0), target(TheISA::PCState(0)),
          inst(MinorDynInst::bubble())
      { }
  
      BranchData(
          Reason reason_,
+        ThreadID thread_id,
          InstSeqNum new_stream_seq_num,
          InstSeqNum new_prediction_seq_num,
          TheISA::PCState target,
          MinorDynInstPtr inst_) :
          reason(reason_),
+        threadId(thread_id),
          newStreamSeqNum(new_stream_seq_num),
          newPredictionSeqNum(new_prediction_seq_num),
          target(target),
@@ -258,8 +261,12 @@ class ForwardInstData /* : public ReportIF, public BubbleIF */
      /** The number of insts slots that can be expected to be valid insts */
      unsigned int numInsts;
  
+    /** Thread associated with these instructions */
+    ThreadID threadId;
+
    public:
-    explicit ForwardInstData(unsigned int width = 0);
+    explicit ForwardInstData(unsigned int width = 0,
+                             ThreadID tid = InvalidThreadID);
  
      ForwardInstData(const ForwardInstData &src);
  
diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc

index 39b7f31f9b3f6e8209b69d601c31ad02a5a50b7d..8c04e3949ae1946ec09c14c51cd2025506222fad 100644 (file)
--- a/src/cpu/minor/pipeline.cc
+++ b/src/cpu/minor/pipeline.cc
@@ -187,9 +187,9 @@ Pipeline::getDataPort()
  }
  
  void
-Pipeline::wakeupFetch()
+Pipeline::wakeupFetch(ThreadID tid)
  {
-    execute.wakeupFetch();
+    fetch1.wakeupFetch(tid);
  }
  
  bool
@@ -212,6 +212,11 @@ void
  Pipeline::drainResume()
  {
      DPRINTF(Drain, "Drain resume\n");
+
+    for (ThreadID tid = 0; tid < cpu.numThreads; tid++) {
+        fetch1.wakeupFetch(tid);
+    }
+
      execute.drainResume();
  }
  
diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh

index 2e1aa9921c0f92ee3c3961c978e98b2f61583aea..9b6ca0d32332143d427b37693fe30fcbf3ef00e8 100644 (file)
--- a/src/cpu/minor/pipeline.hh
+++ b/src/cpu/minor/pipeline.hh
@@ -112,7 +112,7 @@ class Pipeline : public Ticked
    public:
      /** Wake up the Fetch unit.  This is needed on thread activation esp.
       *  after quiesce wakeup */
-    void wakeupFetch();
+    void wakeupFetch(ThreadID tid);
  
      /** Try to drain the CPU */
      bool drain();
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc

index 44fe2fcae8732a62781dff675808c20e5fb87bce..8f20c5ff9329139d1c4910ff5ab7d45d46c54b8a 100644 (file)
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -261,7 +261,7 @@ quiesceSkip(ThreadContext *tc)
  
      EndQuiesceEvent *quiesceEvent = tc->getQuiesceEvent();
  
-    Tick resume = curTick() + 1;
+    Tick resume = cpu->nextCycle() + 1;
  
      cpu->reschedule(quiesceEvent, resume, true);
  
diff --git a/util/minorview/minor.pic b/util/minorview/minor.pic

index 52731d66f3e8113b5cc232b13bd77cc2d8f8924b..fd32a3ef35d675e4ba122f2e645f3522c02dc9c3 100644 (file)
--- a/util/minorview/minor.pic
+++ b/util/minorview/minor.pic
@@ -115,9 +115,9 @@ macro predictionFrame: decoder=frame stripDir=vert dataElement=predictionSeqNum
  # name ::= ? alphanumeric name with dots ?
  # value ::= "(<char-except-">)*", <char-except-' '>* }
  
-Fi: fetch2.inputBuffer inputBuffer decoder=lines
-Di: decode.inputBuffer inputBuffer decoder=insts hideId=E
-Ei: execute.inputBuffer inputBuffer stripDir=horiz decoder=insts border=mid
+Fi: fetch2.inputBuffer0 inputBuffer decoder=lines
+Di: decode.inputBuffer0 inputBuffer decoder=insts hideId=E
+Ei: execute.inputBuffer0 inputBuffer stripDir=horiz decoder=insts border=mid
  F1: fetch1 streamFrame blankStrips=11 name="Fetch1"
  fe: fetch1 decoder=lines border=thin name="Line"
  F2: fetch2 predictionFrame blankStrips=11 name="Fetch2"
@@ -146,9 +146,9 @@ f3: execute.fu.3 fu shorten=2 name=Div
  f4: execute.fu.4 fu shorten=2 name=Float
  f5: execute.fu.5 fu shorten=2 name=Mem
  f6: execute.fu.6 fu shorten=2 name=Misc
-iq: execute.inFlightInsts fifo decoder=insts name="inFlightInsts"
-im: execute.inFUMemInsts fifo decoder=insts name="inFU..."
-sc: execute.scoreboard name="scoreboard" decoder=indexedCounts \
+iq: execute.inFlightInsts0 fifo decoder=insts name="inFlightInsts"
+im: execute.inFUMemInsts0 fifo decoder=insts name="inFU..."
+sc: execute.scoreboard0 name="scoreboard" decoder=indexedCounts \
      dataElement=busy border=mid name="scoreboard" strips=38 stripelems=3
  sa: activity dataElement=stages activity name="Stage activity"
  ac: activity dataElement=activity decoder=counts border=mid name="Activity"
author	Mitch Hayenga <mitch.hayenga@arm.com>
	Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)
committer	Mitch Hayenga <mitch.hayenga@arm.com>
	Thu, 21 Jul 2016 16:19:16 +0000 (17:19 +0100)
src/cpu/minor/MinorCPU.py		patch \| blob \| history
src/cpu/minor/cpu.cc		patch \| blob \| history
src/cpu/minor/cpu.hh		patch \| blob \| history
src/cpu/minor/decode.cc		patch \| blob \| history
src/cpu/minor/decode.hh		patch \| blob \| history
src/cpu/minor/dyn_inst.cc		patch \| blob \| history
src/cpu/minor/exec_context.hh		patch \| blob \| history
src/cpu/minor/execute.cc		patch \| blob \| history
src/cpu/minor/execute.hh		patch \| blob \| history
src/cpu/minor/fetch1.cc		patch \| blob \| history
src/cpu/minor/fetch1.hh		patch \| blob \| history
src/cpu/minor/fetch2.cc		patch \| blob \| history
src/cpu/minor/fetch2.hh		patch \| blob \| history
src/cpu/minor/lsq.cc		patch \| blob \| history
src/cpu/minor/lsq.hh		patch \| blob \| history
src/cpu/minor/pipe_data.cc		patch \| blob \| history
src/cpu/minor/pipe_data.hh		patch \| blob \| history
src/cpu/minor/pipeline.cc		patch \| blob \| history
src/cpu/minor/pipeline.hh		patch \| blob \| history
src/sim/pseudo_inst.cc		patch \| blob \| history
util/minorview/minor.pic		patch \| blob \| history