gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model
[gem5.git] / src / gpu-compute / exec_stage.cc
index 0640083f837d7613566ca0b6366fc6bc107f8160..2dece180b140b088fb82d352b90896b937b8d973 100644 (file)
 
 #include "gpu-compute/exec_stage.hh"
 
+#include <sstream>
+
+#include "base/trace.hh"
+#include "debug/GPUSched.hh"
 #include "gpu-compute/compute_unit.hh"
+#include "gpu-compute/vector_register_file.hh"
 #include "gpu-compute/wavefront.hh"
 
-ExecStage::ExecStage(const ComputeUnitParams *p) : numSIMDs(p->num_SIMDs),
-    numMemUnits(p->num_global_mem_pipes + p->num_shared_mem_pipes),
-    vectorAluInstAvail(nullptr), glbMemInstAvail(nullptr),
-    shrMemInstAvail(nullptr), lastTimeInstExecuted(false),
+ExecStage::ExecStage(const ComputeUnitParams *p) : lastTimeInstExecuted(false),
     thisTimeInstExecuted(false), instrExecuted (false),
     executionResourcesUsed(0)
 {
@@ -53,37 +55,18 @@ ExecStage::init(ComputeUnit *cu)
     computeUnit = cu;
     _name = computeUnit->name() + ".ExecStage";
     dispatchList = &computeUnit->dispatchList;
-    vectorAluInstAvail = &(computeUnit->vectorAluInstAvail);
-    glbMemInstAvail= &(computeUnit->glbMemInstAvail);
-    shrMemInstAvail= &(computeUnit->shrMemInstAvail);
     idle_dur = 0;
 }
 
 void
 ExecStage::collectStatistics(enum STAT_STATUS stage, int unitId) {
     if (stage == IdleExec) {
-        // count cycles of no vector ALU instruction executed
-        // even if one was the oldest in a WV of that vector SIMD unit
-        if (computeUnit->isVecAlu(unitId) && vectorAluInstAvail->at(unitId)) {
-            numCyclesWithNoInstrTypeIssued[unitId]++;
-        }
-
-        // count cycles of no global memory (vector) instruction executed
-        // even if one was the oldest in a WV of that vector SIMD unit
-        if (computeUnit->isGlbMem(unitId) && *glbMemInstAvail > 0) {
-            numCyclesWithNoInstrTypeIssued[unitId]++;
-            (*glbMemInstAvail)--;
-        }
-
-        // count cycles of no shared memory (vector) instruction executed
-        // even if one was the oldest in a WV of that vector SIMD unit
-        if (computeUnit->isShrMem(unitId) && *shrMemInstAvail > 0) {
-            numCyclesWithNoInstrTypeIssued[unitId]++;
-            (*shrMemInstAvail)--;
-        }
+        // count cycles when no instruction to a specific execution resource
+        // is executed
+        numCyclesWithNoInstrTypeIssued[unitId]++;
     } else if (stage == BusyExec) {
-        // count the number of cycles an instruction to a specific unit
-        // was issued
+        // count the number of cycles an instruction to a specific execution
+        // resource type was issued
         numCyclesWithInstrTypeIssued[unitId]++;
         thisTimeInstExecuted = true;
         instrExecuted = true;
@@ -102,14 +85,13 @@ ExecStage::collectStatistics(enum STAT_STATUS stage, int unitId) {
         }
 
         lastTimeInstExecuted = thisTimeInstExecuted;
-        // track the number of cycles we either issued one vector instruction
-        // or issued no instructions at all
+        // track the number of cycles we either issued at least
+        // instruction or issued no instructions at all
         if (instrExecuted) {
             numCyclesWithInstrIssued++;
         } else {
             numCyclesWithNoIssue++;
         }
-
         spc.sample(executionResourcesUsed);
     }
 }
@@ -122,25 +104,86 @@ ExecStage::initStatistics()
     thisTimeInstExecuted = false;
 }
 
+std::string
+ExecStage::dispStatusToStr(int i)
+{
+    std::string s("INVALID");
+    switch (i) {
+    case EMPTY:
+        s = "EMPTY";
+        break;
+    case SKIP:
+        s = "SKIP";
+        break;
+    case EXREADY:
+        s = "EXREADY";
+        break;
+    }
+    return s;
+}
+
+void
+ExecStage::dumpDispList()
+{
+    std::stringstream ss;
+    bool empty = true;
+    for (int i = 0; i < computeUnit->numExeUnits(); i++) {
+        DISPATCH_STATUS s = dispatchList->at(i).second;
+        ss << i << ": " << dispStatusToStr(s);
+        if (s != EMPTY) {
+            empty = false;
+            Wavefront *w = dispatchList->at(i).first;
+            ss << " SIMD[" << w->simdId << "] WV[" << w->wfDynId << "]: ";
+            ss << (w->instructionBuffer.front())->seqNum() << ": ";
+            ss << (w->instructionBuffer.front())->disassemble();
+        }
+        ss << "\n";
+    }
+    if (!empty) {
+        DPRINTF(GPUSched, "Dispatch List:\n%s", ss.str());
+    }
+}
+
 void
 ExecStage::exec()
 {
     initStatistics();
-
-    for (int unitId = 0; unitId < (numSIMDs + numMemUnits); ++unitId) {
-         // if dispatch list for this execution resource is empty,
-         // skip this execution resource this cycle
-         if (dispatchList->at(unitId).second == EMPTY) {
-             collectStatistics(IdleExec, unitId);
-             continue;
-         }
-
-         collectStatistics(BusyExec, unitId);
-         // execute an instruction for the WF
-         dispatchList->at(unitId).first->exec();
-         // clear the dispatch list entry
-         dispatchList->at(unitId).second = EMPTY;
-         dispatchList->at(unitId).first = (Wavefront*)nullptr;
+    if (Debug::GPUSched) {
+        dumpDispList();
+    }
+    for (int unitId = 0; unitId < computeUnit->numExeUnits(); ++unitId) {
+        DISPATCH_STATUS s = dispatchList->at(unitId).second;
+        switch (s) {
+        case EMPTY:
+            // Do not execute if empty, waiting for VRF reads,
+            // or LM tied to GM waiting for VRF reads
+            collectStatistics(IdleExec, unitId);
+            break;
+        case EXREADY:
+        {
+            collectStatistics(BusyExec, unitId);
+            Wavefront *w = dispatchList->at(unitId).first;
+            DPRINTF(GPUSched, "Exec[%d]: SIMD[%d] WV[%d]: %s\n",
+                    unitId, w->simdId, w->wfDynId,
+                    (w->instructionBuffer.front())->disassemble());
+            DPRINTF(GPUSched, "dispatchList[%d] EXREADY->EMPTY\n", unitId);
+            dispatchList->at(unitId).first->exec();
+            (computeUnit->scheduleStage).deleteFromSch(w);
+            dispatchList->at(unitId).second = EMPTY;
+            dispatchList->at(unitId).first->freeResources();
+            dispatchList->at(unitId).first = nullptr;
+            break;
+        }
+        case SKIP:
+            collectStatistics(BusyExec, unitId);
+            DPRINTF(GPUSched, "dispatchList[%d] SKIP->EMPTY\n", unitId);
+            dispatchList->at(unitId).second = EMPTY;
+            dispatchList->at(unitId).first->freeResources();
+            dispatchList->at(unitId).first = nullptr;
+            break;
+        default:
+            panic("Unknown dispatch status in exec()\n");
+        }
     }
 
     collectStatistics(PostExec, 0);
@@ -165,7 +208,7 @@ ExecStage::regStats()
         ;
 
     spc
-        .init(0, numSIMDs + numMemUnits, 1)
+        .init(0, computeUnit->numExeUnits(), 1)
         .name(name() + ".spc")
         .desc("Execution units active per cycle (Exec unit=SIMD,MemPipe)")
         ;
@@ -177,25 +220,36 @@ ExecStage::regStats()
         ;
 
     numCyclesWithInstrTypeIssued
-        .init(numSIMDs + numMemUnits)
-        .name(name() + ".num_cycles_with_instrtype_issue")
-        .desc("Number of cycles at least one instruction of specific type "
-              "issued")
+        .init(computeUnit->numExeUnits())
+        .name(name() + ".num_cycles_issue_exec_rsrc")
+        .desc("Number of cycles at least one instruction issued to "
+              "execution resource type")
         ;
 
     numCyclesWithNoInstrTypeIssued
-        .init(numSIMDs + numMemUnits)
-       .name(name() + ".num_cycles_with_instr_type_no_issue")
-       .desc("Number of cycles no instruction of specific type issued")
+        .init(computeUnit->numExeUnits())
+       .name(name() + ".num_cycles_no_issue_exec_rsrc")
+       .desc("Number of clks no instructions issued to execution "
+             "resource type")
        ;
 
-    for (int i = 0; i < numSIMDs; ++i) {
-        numCyclesWithInstrTypeIssued.subname(i, csprintf("ALU%d",i));
-        numCyclesWithNoInstrTypeIssued.subname(i, csprintf("ALU%d",i));
+    int c = 0;
+    for (int i = 0; i < computeUnit->numVectorALUs; i++,c++) {
+        std::string s = "VectorALU" + std::to_string(i);
+        numCyclesWithNoInstrTypeIssued.subname(c, s);
+        numCyclesWithInstrTypeIssued.subname(c, s);
+    }
+    for (int i = 0; i < computeUnit->numScalarALUs; i++,c++) {
+        std::string s = "ScalarALU" + std::to_string(i);
+        numCyclesWithNoInstrTypeIssued.subname(c, s);
+        numCyclesWithInstrTypeIssued.subname(c, s);
     }
+    numCyclesWithNoInstrTypeIssued.subname(c, "VectorMemPipe");
+    numCyclesWithInstrTypeIssued.subname(c++, "VectorMemPipe");
+
+    numCyclesWithNoInstrTypeIssued.subname(c, "SharedMemPipe");
+    numCyclesWithInstrTypeIssued.subname(c++, "SharedMemPipe");
 
-    numCyclesWithInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
-    numCyclesWithNoInstrTypeIssued.subname(numSIMDs, csprintf("GM"));
-    numCyclesWithInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
-    numCyclesWithNoInstrTypeIssued.subname(numSIMDs + 1, csprintf("LM"));
+    numCyclesWithNoInstrTypeIssued.subname(c, "ScalarMemPipe");
+    numCyclesWithInstrTypeIssued.subname(c++, "ScalarMemPipe");
 }