gpu: fix bugs with MemFence, Flat Instrs and Resource utilization
authorJohn Kalamatianos <john.kalamatianos@amd.com>
Thu, 18 Feb 2016 15:42:03 +0000 (10:42 -0500)
committerJohn Kalamatianos <john.kalamatianos@amd.com>
Thu, 18 Feb 2016 15:42:03 +0000 (10:42 -0500)
Both Memory Fence is now flagged as Global Memory only to avoid resource
oversubscribing.
Flat instructions now check for Shared Memory resource busy to avoid
oversubscribing resources.
All WaitClass resources now use cycles (not ticks) to register the number
of pipe stages between Scoreboard and Execute to be consistent with
instruction scheduling logic which always used clock cycles.

src/gpu-compute/code_enums.hh
src/gpu-compute/compute_unit.cc
src/gpu-compute/wavefront.cc

index 126cf6c50cca68117263c97b2404e240c314c57f..6cd9bfe26c213623f772c3248e6fcfb0fc270ef0 100644 (file)
@@ -84,6 +84,7 @@
                     ||(a)==Enums::OT_PRIVATE_ATOMIC \
                     ||(a)==Enums::OT_SPILL_ATOMIC \
                     ||(a)==Enums::OT_READONLY_ATOMIC \
+                    ||(a)==Enums::OT_BOTH_MEMFENCE \
                     ||(a)==Enums::OT_FLAT_ATOMIC)
 
 #define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
@@ -93,8 +94,7 @@
                     ||(a)==Enums::OT_BOTH_MEMFENCE)
 
 #define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
-                    ||(a)==Enums::OT_SHARED_MEMFENCE \
-                    ||(a)==Enums::OT_BOTH_MEMFENCE)
+                    ||(a)==Enums::OT_SHARED_MEMFENCE)
 
 #define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
 
index d3622007a913d90e2542fba327cfdf7aea08c24b..63f3e8fb5107c09e62778a1d75202871ea0e18c7 100644 (file)
@@ -587,8 +587,8 @@ void
 ComputeUnit::init()
 {
     // Initialize CU Bus models
-    glbMemToVrfBus.init(&shader->tick_cnt, 1);
-    locMemToVrfBus.init(&shader->tick_cnt, 1);
+    glbMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
+    locMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
     nextGlbMemBus = 0;
     nextLocMemBus = 0;
     fatal_if(numGlbMemUnits > 1,
@@ -596,7 +596,7 @@ ComputeUnit::init()
     vrfToGlobalMemPipeBus.resize(numGlbMemUnits);
     for (int j = 0; j < numGlbMemUnits; ++j) {
         vrfToGlobalMemPipeBus[j] = WaitClass();
-        vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, 1);
+        vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
     }
 
     fatal_if(numLocMemUnits > 1,
@@ -604,7 +604,7 @@ ComputeUnit::init()
     vrfToLocalMemPipeBus.resize(numLocMemUnits);
     for (int j = 0; j < numLocMemUnits; ++j) {
         vrfToLocalMemPipeBus[j] = WaitClass();
-        vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, 1);
+        vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
     }
     vectorRegsReserved.resize(numSIMDs, 0);
     aluPipe.resize(numSIMDs);
@@ -612,12 +612,12 @@ ComputeUnit::init()
 
     for (int i = 0; i < numSIMDs + numLocMemUnits + numGlbMemUnits; ++i) {
         wfWait[i] = WaitClass();
-        wfWait[i].init(&shader->tick_cnt, 1);
+        wfWait[i].init(&shader->tick_cnt, shader->ticks(1));
     }
 
     for (int i = 0; i < numSIMDs; ++i) {
         aluPipe[i] = WaitClass();
-        aluPipe[i].init(&shader->tick_cnt, 1);
+        aluPipe[i].init(&shader->tick_cnt, shader->ticks(1));
     }
 
     // Setup space for call args
index 0aa033db1bed3a8e03faa190b69e1c7fa1115afa..ed13b22c7da1435b9448d9cfff456c1c95548af0 100644 (file)
@@ -162,7 +162,6 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
 
     if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
         IS_OT_ATOMIC_GM(ii->opType())) {
-
         return true;
     }
 
@@ -349,7 +348,7 @@ Wavefront::ready(itype_e type)
     }
     bool locMemBusRdy = false;
     bool locMemIssueRdy = false;
-    if (type == I_SHARED) {
+    if (type == I_SHARED || type == I_FLAT) {
         for (int j=0; j < computeUnit->numLocMemUnits; ++j) {
             if (computeUnit->vrfToLocalMemPipeBus[j].prerdy())
                 locMemBusRdy = true;
@@ -598,7 +597,6 @@ Wavefront::ready(itype_e type)
 
     DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id,
             simdId, wfSlotId, ii->disassemble());
-
     return 1;
 }