gpu-compute: Wavefront refactoring

author Alexandru Dutu <alexandru.dutu@amd.com>

Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)

committer Alexandru Dutu <alexandru.dutu@amd.com>

Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)
author Alexandru Dutu <alexandru.dutu@amd.com>
Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)
committer Alexandru Dutu <alexandru.dutu@amd.com>
Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py

index f7768054134d4748002ff65e35bfe644ab88c9ed..22832658f9f05caa0a15f6ffa3f5f1b0430d5b0e 100755 (executable)
--- a/src/arch/hsail/gen.py
+++ b/src/arch/hsail/gen.py
@@ -233,7 +233,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
  
      typedef Base::DestCType DestCType;
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -254,7 +254,7 @@ $class_name::execute(GPUDynInstPtr gpuDynInst)
      typedef Base::DestCType DestCType;
      typedef Base::SrcCType  SrcCType;
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -275,7 +275,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
  {
      Wavefront *w = gpuDynInst->wavefront();
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -310,7 +310,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
      typedef typename Base::Src1CType Src1T;
      typedef typename Base::Src2CType Src2T;
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -344,7 +344,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
      typedef CType Src0T;
      typedef typename Base::Src1CType Src1T;
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -371,7 +371,7 @@ $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
  {
      Wavefront *w = gpuDynInst->wavefront();
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
              CType dest_val;
@@ -399,7 +399,7 @@ $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
  {
      Wavefront *w = gpuDynInst->wavefront();
  
-    const VectorMask &mask = w->get_pred();
+    const VectorMask &mask = w->getPred();
  
      for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
          if (mask[lane]) {
@@ -745,17 +745,17 @@ def gen_special(brig_opcode, expr, dest_type='U32'):
  
      gen(brig_opcode, None, expr, base_class)
  
-gen_special('WorkItemId', 'w->workitemid[src0][lane]')
+gen_special('WorkItemId', 'w->workItemId[src0][lane]')
  gen_special('WorkItemAbsId',
-    'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])')
-gen_special('WorkGroupId', 'w->workgroupid[src0]')
-gen_special('WorkGroupSize', 'w->workgroupsz[src0]')
-gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]')
-gen_special('GridSize', 'w->gridsz[src0]')
+    'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
+gen_special('WorkGroupId', 'w->workGroupId[src0]')
+gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
+gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
+gen_special('GridSize', 'w->gridSz[src0]')
  gen_special('GridGroups',
-    'divCeil(w->gridsz[src0],w->workgroupsz[src0])')
+    'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
  gen_special('LaneId', 'lane')
-gen_special('WaveId', 'w->dynwaveid')
+gen_special('WaveId', 'w->dynWaveId')
  gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
  
  # gen_special('CU'', ')
diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh

index 90609c365b4af1a1ebc17e78937bae043e8f159a..48e022ff75d0360647343dbae9afcf8bc36f1a97 100644 (file)
--- a/src/arch/hsail/insts/decl.hh
+++ b/src/arch/hsail/insts/decl.hh
@@ -960,7 +960,7 @@ namespace HsailISA
                  gpuDynInst->simdId = w->simdId;
                  gpuDynInst->wfSlotId = w->wfSlotId;
                  gpuDynInst->wfDynId = w->wfDynId;
-                gpuDynInst->kern_id = w->kern_id;
+                gpuDynInst->kern_id = w->kernId;
                  gpuDynInst->cu_id = w->computeUnit->cu_id;
  
                  gpuDynInst->memoryOrder =
@@ -971,10 +971,10 @@ namespace HsailISA
                  GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
                  gmp->getGMReqFIFO().push(gpuDynInst);
  
-                w->wr_gm_reqs_in_pipe--;
-                w->rd_gm_reqs_in_pipe--;
-                w->mem_reqs_in_pipe--;
-                w->outstanding_reqs++;
+                w->wrGmReqsInPipe--;
+                w->rdGmReqsInPipe--;
+                w->memReqsInPipe--;
+                w->outstandingReqs++;
              } else if (o_type == Enums::OT_SHARED_MEMFENCE) {
                  // no-op
              } else {
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc

index 004054524cf9a7afdcfc6fe0685766ffe7175c22..f1662430acabfb1c648c388c1e7a75b78a9f31eb 100644 (file)
--- a/src/arch/hsail/insts/main.cc
+++ b/src/arch/hsail/insts/main.cc
@@ -131,12 +131,12 @@ namespace HsailISA
      {
          Wavefront *w = gpuDynInst->wavefront();
  
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
  
          // mask off completed work-items
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
-                w->init_mask[lane] = 0;
+                w->initMask[lane] = 0;
              }
  
          }
@@ -149,14 +149,14 @@ namespace HsailISA
          }
  
          // if all work-items have completed, then wave-front is done
-        if (w->init_mask.none()) {
+        if (w->initMask.none()) {
              w->status = Wavefront::S_STOPPED;
  
              int32_t refCount = w->computeUnit->getLds().
-                                   decreaseRefCounter(w->dispatchid, w->wg_id);
+                                   decreaseRefCounter(w->dispatchId, w->wgId);
  
              DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
-                            w->computeUnit->cu_id, w->wg_id, refCount);
+                            w->computeUnit->cu_id, w->wgId, refCount);
  
              // free the vector registers of the completed wavefront
              w->computeUnit->vectorRegsReserved[w->simdId] -=
@@ -201,8 +201,8 @@ namespace HsailISA
      {
          Wavefront *w = gpuDynInst->wavefront();
  
-        assert(w->barrier_cnt == w->old_barrier_cnt);
-        w->barrier_cnt = w->old_barrier_cnt + 1;
+        assert(w->barrierCnt == w->oldBarrierCnt);
+        w->barrierCnt = w->oldBarrierCnt + 1;
          w->stalledAtBarrier = true;
      }
  } // namespace HsailISA
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh

index 8329c6e8a22f070164c9063b365c2327d8d71220..3042e2201b941656fcff341c0bc374c4c35bb3a7 100644 (file)
--- a/src/arch/hsail/insts/mem_impl.hh
+++ b/src/arch/hsail/insts/mem_impl.hh
@@ -59,7 +59,7 @@ namespace HsailISA
          Wavefront *w = gpuDynInst->wavefront();
  
          typedef typename DestDataType::CType CType M5_VAR_USED;
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::vector<Addr> addr_vec;
          addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
          this->addr.calcVector(w, addr_vec);
@@ -159,7 +159,7 @@ namespace HsailISA
          Wavefront *w = gpuDynInst->wavefront();
  
          typedef typename MemDataType::CType MemCType;
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
  
          // Kernarg references are handled uniquely for now (no Memory Request
          // is used), so special-case them up front.  Someday we should
@@ -230,7 +230,7 @@ namespace HsailISA
          m->simdId = w->simdId;
          m->wfSlotId = w->wfSlotId;
          m->wfDynId = w->wfDynId;
-        m->kern_id = w->kern_id;
+        m->kern_id = w->kernId;
          m->cu_id = w->computeUnit->cu_id;
          m->latency.init(&w->computeUnit->shader->tick_cnt);
  
@@ -261,8 +261,8 @@ namespace HsailISA
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_rd_gm++;
-            w->rd_gm_reqs_in_pipe--;
+            w->outstandingReqsRdGm++;
+            w->rdGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_SPILL:
@@ -281,14 +281,14 @@ namespace HsailISA
                          m->addr[lane] = m->addr[lane] * w->spillWidth +
                                          lane * sizeof(MemCType) + w->spillBase;
  
-                        w->last_addr[lane] = m->addr[lane];
+                        w->lastAddr[lane] = m->addr[lane];
                      }
                  }
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_rd_gm++;
-            w->rd_gm_reqs_in_pipe--;
+            w->outstandingReqsRdGm++;
+            w->rdGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_GROUP:
@@ -296,8 +296,8 @@ namespace HsailISA
              m->pipeId = LDSMEM_PIPE;
              m->latency.set(w->computeUnit->shader->ticks(24));
              w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstanding_reqs_rd_lm++;
-            w->rd_lm_reqs_in_pipe--;
+            w->outstandingReqsRdLm++;
+            w->rdLmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_READONLY:
@@ -313,8 +313,8 @@ namespace HsailISA
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_rd_gm++;
-            w->rd_gm_reqs_in_pipe--;
+            w->outstandingReqsRdGm++;
+            w->rdGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_PRIVATE:
@@ -332,8 +332,8 @@ namespace HsailISA
                  }
              }
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_rd_gm++;
-            w->rd_gm_reqs_in_pipe--;
+            w->outstandingReqsRdGm++;
+            w->rdGmReqsInPipe--;
              break;
  
            default:
@@ -341,8 +341,8 @@ namespace HsailISA
                    m->addr[0]);
          }
  
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      template<typename OperationType, typename SrcDataType,
@@ -355,7 +355,7 @@ namespace HsailISA
  
          typedef typename OperationType::CType CType;
  
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
  
          // arg references are handled uniquely for now (no Memory Request
          // is used), so special-case them up front.  Someday we should
@@ -419,7 +419,7 @@ namespace HsailISA
          m->simdId = w->simdId;
          m->wfSlotId = w->wfSlotId;
          m->wfDynId = w->wfDynId;
-        m->kern_id = w->kern_id;
+        m->kern_id = w->kernId;
          m->cu_id = w->computeUnit->cu_id;
          m->latency.init(&w->computeUnit->shader->tick_cnt);
  
@@ -448,8 +448,8 @@ namespace HsailISA
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_wr_gm++;
-            w->wr_gm_reqs_in_pipe--;
+            w->outstandingReqsWrGm++;
+            w->wrGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_SPILL:
@@ -469,8 +469,8 @@ namespace HsailISA
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_wr_gm++;
-            w->wr_gm_reqs_in_pipe--;
+            w->outstandingReqsWrGm++;
+            w->wrGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_GROUP:
@@ -478,8 +478,8 @@ namespace HsailISA
              m->pipeId = LDSMEM_PIPE;
              m->latency.set(w->computeUnit->shader->ticks(24));
              w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstanding_reqs_wr_lm++;
-            w->wr_lm_reqs_in_pipe--;
+            w->outstandingReqsWrLm++;
+            w->wrLmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_PRIVATE:
@@ -497,16 +497,16 @@ namespace HsailISA
              }
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_wr_gm++;
-            w->wr_gm_reqs_in_pipe--;
+            w->outstandingReqsWrGm++;
+            w->wrGmReqsInPipe--;
              break;
  
            default:
              fatal("Store to unsupported segment %d\n", this->segment);
          }
  
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      template<typename OperationType, typename SrcDataType,
@@ -596,7 +596,7 @@ namespace HsailISA
          m->simdId = w->simdId;
          m->wfSlotId = w->wfSlotId;
          m->wfDynId = w->wfDynId;
-        m->kern_id = w->kern_id;
+        m->kern_id = w->kernId;
          m->cu_id = w->computeUnit->cu_id;
          m->latency.init(&w->computeUnit->shader->tick_cnt);
  
@@ -607,10 +607,10 @@ namespace HsailISA
              m->pipeId = GLBMEM_PIPE;
  
              w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-            w->outstanding_reqs_wr_gm++;
-            w->wr_gm_reqs_in_pipe--;
-            w->outstanding_reqs_rd_gm++;
-            w->rd_gm_reqs_in_pipe--;
+            w->outstandingReqsWrGm++;
+            w->wrGmReqsInPipe--;
+            w->outstandingReqsRdGm++;
+            w->rdGmReqsInPipe--;
              break;
  
            case Brig::BRIG_SEGMENT_GROUP:
@@ -618,10 +618,10 @@ namespace HsailISA
              m->pipeId = LDSMEM_PIPE;
              m->latency.set(w->computeUnit->shader->ticks(24));
              w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstanding_reqs_wr_lm++;
-            w->wr_lm_reqs_in_pipe--;
-            w->outstanding_reqs_rd_lm++;
-            w->rd_lm_reqs_in_pipe--;
+            w->outstandingReqsWrLm++;
+            w->wrLmReqsInPipe--;
+            w->outstandingReqsRdLm++;
+            w->rdLmReqsInPipe--;
              break;
  
            default:
@@ -629,8 +629,8 @@ namespace HsailISA
                    this->segment);
          }
  
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc

index 56ca8047c50760272505f0f84fcf9b865edca502..2bfc5aaad4a51f7dc60c4165aa2b8c0902ed6aee 100644 (file)
--- a/src/arch/hsail/insts/pseudo_inst.cc
+++ b/src/arch/hsail/insts/pseudo_inst.cc
@@ -79,7 +79,7 @@ namespace HsailISA
      void
      Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
  
          int op = 0;
          bool got_op = false;
@@ -181,7 +181,7 @@ namespace HsailISA
      Call::MagicPrintLane(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
                  int src_val1 = src1.get<int>(w, lane, 1);
@@ -204,7 +204,7 @@ namespace HsailISA
      Call::MagicPrintLane64(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
                  int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
@@ -227,7 +227,7 @@ namespace HsailISA
      Call::MagicPrintWF32(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::string res_str;
          res_str = csprintf("krl_prt (%s)\n", disassemble());
  
@@ -265,7 +265,7 @@ namespace HsailISA
      Call::MagicPrintWF32ID(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::string res_str;
          int src_val3 = -1;
          res_str = csprintf("krl_prt (%s)\n", disassemble());
@@ -307,7 +307,7 @@ namespace HsailISA
      Call::MagicPrintWF64(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::string res_str;
          res_str = csprintf("krl_prt (%s)\n", disassemble());
  
@@ -345,7 +345,7 @@ namespace HsailISA
      Call::MagicPrintWFID64(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::string res_str;
          int src_val3 = -1;
          res_str = csprintf("krl_prt (%s)\n", disassemble());
@@ -387,7 +387,7 @@ namespace HsailISA
      Call::MagicPrintWFFloat(Wavefront *w)
      {
      #if TRACING_ON
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          std::string res_str;
          res_str = csprintf("krl_prt (%s)\n", disassemble());
  
@@ -425,7 +425,7 @@ namespace HsailISA
          res_str = csprintf("Breakpoint encountered for wavefront %i\n",
                             w->wfSlotId);
  
-        res_str += csprintf("  Kern ID: %i\n", w->kern_id);
+        res_str += csprintf("  Kern ID: %i\n", w->kernId);
          res_str += csprintf("  Phase ID: %i\n", w->simdId);
          res_str += csprintf("  Executing on CU #%i\n", w->computeUnit->cu_id);
          res_str += csprintf("  Exec mask: ");
@@ -455,7 +455,7 @@ namespace HsailISA
      void
      Call::MagicPrefixSum(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int res = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
@@ -474,7 +474,7 @@ namespace HsailISA
          //   The reduction instruction takes up to 64 inputs (one from
          //   each thread in a WF) and sums them. It returns the sum to
          //   each thread in the WF.
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int res = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
@@ -494,7 +494,7 @@ namespace HsailISA
      void
      Call::MagicMaskLower(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int res = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
@@ -519,7 +519,7 @@ namespace HsailISA
      void
      Call::MagicMaskUpper(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int res = 0;
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
@@ -544,42 +544,42 @@ namespace HsailISA
      void
      Call::MagicJoinWFBar(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int max_cnt = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
-                w->bar_cnt[lane]++;
+                w->barCnt[lane]++;
  
-                if (w->bar_cnt[lane] > max_cnt) {
-                    max_cnt = w->bar_cnt[lane];
+                if (w->barCnt[lane] > max_cnt) {
+                    max_cnt = w->barCnt[lane];
                  }
              }
          }
  
-        if (max_cnt > w->max_bar_cnt) {
-            w->max_bar_cnt = max_cnt;
+        if (max_cnt > w->maxBarCnt) {
+            w->maxBarCnt = max_cnt;
          }
      }
  
      void
      Call::MagicWaitWFBar(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int max_cnt = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
-                w->bar_cnt[lane]--;
+                w->barCnt[lane]--;
              }
  
-            if (w->bar_cnt[lane] > max_cnt) {
-                max_cnt = w->bar_cnt[lane];
+            if (w->barCnt[lane] > max_cnt) {
+                max_cnt = w->barCnt[lane];
              }
          }
  
-        if (max_cnt < w->max_bar_cnt) {
-            w->max_bar_cnt = max_cnt;
+        if (max_cnt < w->maxBarCnt) {
+            w->maxBarCnt = max_cnt;
          }
  
          w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
@@ -591,7 +591,7 @@ namespace HsailISA
      void
      Call::MagicPanic(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
              if (mask[lane]) {
@@ -648,12 +648,12 @@ namespace HsailISA
          m->pipeId = GLBMEM_PIPE;
          m->latency.set(w->computeUnit->shader->ticks(64));
          w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-        w->outstanding_reqs_wr_gm++;
-        w->wr_gm_reqs_in_pipe--;
-        w->outstanding_reqs_rd_gm++;
-        w->rd_gm_reqs_in_pipe--;
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqsWrGm++;
+        w->wrGmReqsInPipe--;
+        w->outstandingReqsRdGm++;
+        w->rdGmReqsInPipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      void
@@ -687,12 +687,12 @@ namespace HsailISA
          m->pipeId = GLBMEM_PIPE;
          m->latency.set(w->computeUnit->shader->ticks(64));
          w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-        w->outstanding_reqs_wr_gm++;
-        w->wr_gm_reqs_in_pipe--;
-        w->outstanding_reqs_rd_gm++;
-        w->rd_gm_reqs_in_pipe--;
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqsWrGm++;
+        w->wrGmReqsInPipe--;
+        w->outstandingReqsRdGm++;
+        w->rdGmReqsInPipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      void
@@ -725,16 +725,16 @@ namespace HsailISA
          m->pipeId = GLBMEM_PIPE;
          m->latency.set(w->computeUnit->shader->ticks(1));
          w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
-        w->outstanding_reqs_rd_gm++;
-        w->rd_gm_reqs_in_pipe--;
-        w->outstanding_reqs++;
-        w->mem_reqs_in_pipe--;
+        w->outstandingReqsRdGm++;
+        w->rdGmReqsInPipe--;
+        w->outstandingReqs++;
+        w->memReqsInPipe--;
      }
  
      void
      Call::MagicXactCasLd(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int src_val1 = 0;
  
          for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
@@ -756,7 +756,7 @@ namespace HsailISA
      void
      Call::MagicMostSigThread(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          unsigned mst = true;
  
          for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
@@ -770,7 +770,7 @@ namespace HsailISA
      void
      Call::MagicMostSigBroadcast(Wavefront *w)
      {
-        const VectorMask &mask = w->get_pred();
+        const VectorMask &mask = w->getPred();
          int res = 0;
          bool got_res = false;
  
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc

index 83e2414db7a278c918b53650cc6fdabaa4590e5c..32fa3bd6a4bcd3d69c10c1947b80a56309f6fd6c 100644 (file)
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -178,13 +178,13 @@ ComputeUnit::FillKernelState(Wavefront *w, NDRange *ndr)
  {
      w->resizeRegFiles(ndr->q.cRegCount, ndr->q.sRegCount, ndr->q.dRegCount);
  
-    w->workgroupsz[0] = ndr->q.wgSize[0];
-    w->workgroupsz[1] = ndr->q.wgSize[1];
-    w->workgroupsz[2] = ndr->q.wgSize[2];
-    w->wg_sz = w->workgroupsz[0] * w->workgroupsz[1] * w->workgroupsz[2];
-    w->gridsz[0] = ndr->q.gdSize[0];
-    w->gridsz[1] = ndr->q.gdSize[1];
-    w->gridsz[2] = ndr->q.gdSize[2];
+    w->workGroupSz[0] = ndr->q.wgSize[0];
+    w->workGroupSz[1] = ndr->q.wgSize[1];
+    w->workGroupSz[2] = ndr->q.wgSize[2];
+    w->wgSz = w->workGroupSz[0] * w->workGroupSz[1] * w->workGroupSz[2];
+    w->gridSz[0] = ndr->q.gdSize[0];
+    w->gridSz[1] = ndr->q.gdSize[1];
+    w->gridSz[2] = ndr->q.gdSize[2];
      w->kernelArgs = ndr->q.args;
      w->privSizePerItem = ndr->q.privMemPerItem;
      w->spillSizePerItem = ndr->q.spillMemPerItem;
@@ -236,29 +236,29 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
              init_mask[k] = 1;
      }
  
-    w->kern_id = ndr->dispatchId;
-    w->dynwaveid = cnt;
-    w->init_mask = init_mask.to_ullong();
+    w->kernId = ndr->dispatchId;
+    w->dynWaveId = cnt;
+    w->initMask = init_mask.to_ullong();
  
      for (int k = 0; k < wfSize(); ++k) {
-        w->workitemid[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
-        w->workitemid[1][k] =
+        w->workItemId[0][k] = (k+cnt*wfSize()) % trueWgSize[0];
+        w->workItemId[1][k] =
              ((k + cnt * wfSize()) / trueWgSize[0]) % trueWgSize[1];
-        w->workitemid[2][k] =
+        w->workItemId[2][k] =
              (k + cnt * wfSize()) / (trueWgSize[0] * trueWgSize[1]);
  
-        w->workitemFlatId[k] = w->workitemid[2][k] * trueWgSize[0] *
-            trueWgSize[1] + w->workitemid[1][k] * trueWgSize[0] +
-            w->workitemid[0][k];
+        w->workItemFlatId[k] = w->workItemId[2][k] * trueWgSize[0] *
+            trueWgSize[1] + w->workItemId[1][k] * trueWgSize[0] +
+            w->workItemId[0][k];
      }
  
-    w->barrier_slots = divCeil(trueWgSizeTotal, wfSize());
+    w->barrierSlots = divCeil(trueWgSizeTotal, wfSize());
  
-    w->bar_cnt.resize(wfSize(), 0);
+    w->barCnt.resize(wfSize(), 0);
  
-    w->max_bar_cnt = 0;
-    w->old_barrier_cnt = 0;
-    w->barrier_cnt = 0;
+    w->maxBarCnt = 0;
+    w->oldBarrierCnt = 0;
+    w->barrierCnt = 0;
  
      w->privBase = ndr->q.privMemStart;
      ndr->q.privMemStart += ndr->q.privMemPerItem * wfSize();
@@ -269,22 +269,22 @@ ComputeUnit::StartWF(Wavefront *w, int trueWgSize[], int trueWgSizeTotal,
      w->pushToReconvergenceStack(0, UINT32_MAX, init_mask.to_ulong());
  
      // WG state
-    w->wg_id = ndr->globalWgId;
-    w->dispatchid = ndr->dispatchId;
-    w->workgroupid[0] = w->wg_id % ndr->numWg[0];
-    w->workgroupid[1] = (w->wg_id / ndr->numWg[0]) % ndr->numWg[1];
-    w->workgroupid[2] = w->wg_id / (ndr->numWg[0] * ndr->numWg[1]);
+    w->wgId = ndr->globalWgId;
+    w->dispatchId = ndr->dispatchId;
+    w->workGroupId[0] = w->wgId % ndr->numWg[0];
+    w->workGroupId[1] = (w->wgId / ndr->numWg[0]) % ndr->numWg[1];
+    w->workGroupId[2] = w->wgId / (ndr->numWg[0] * ndr->numWg[1]);
  
-    w->barrier_id = barrier_id;
+    w->barrierId = barrier_id;
      w->stalledAtBarrier = false;
  
      // set the wavefront context to have a pointer to this section of the LDS
      w->ldsChunk = ldsChunk;
  
      int32_t refCount M5_VAR_USED =
-                    lds.increaseRefCounter(w->dispatchid, w->wg_id);
+                    lds.increaseRefCounter(w->dispatchId, w->wgId);
      DPRINTF(GPUDisp, "CU%d: increase ref ctr wg[%d] to [%d]\n",
-                    cu_id, w->wg_id, refCount);
+                    cu_id, w->wgId, refCount);
  
      w->instructionBuffer.clear();
  
@@ -468,15 +468,15 @@ ComputeUnit::AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
                  DPRINTF(GPUSync, "Checking WF[%d][%d]\n", i_simd, i_wf);
  
                  DPRINTF(GPUSync, "wf->barrier_id = %d, _barrier_id = %d\n",
-                        w->barrier_id, _barrier_id);
+                        w->barrierId, _barrier_id);
  
                  DPRINTF(GPUSync, "wf->barrier_cnt %d, bcnt = %d\n",
-                        w->barrier_cnt, bcnt);
+                        w->barrierCnt, bcnt);
              }
  
              if (w->status == Wavefront::S_RUNNING &&
-                w->barrier_id == _barrier_id && w->barrier_cnt == bcnt &&
-                !w->outstanding_reqs) {
+                w->barrierId == _barrier_id && w->barrierCnt == bcnt &&
+                !w->outstandingReqs) {
                  ++ccnt;
  
                  DPRINTF(GPUSync, "WF[%d][%d] at barrier, increment ccnt to "
@@ -646,17 +646,17 @@ ComputeUnit::DataPort::recvTimingResp(PacketPtr pkt)
          if (w->status == Wavefront::S_RETURNING) {
              DPRINTF(GPUDisp, "CU%d: WF[%d][%d][wv=%d]: WG id completed %d\n",
                      computeUnit->cu_id, w->simdId, w->wfSlotId,
-                    w->wfDynId, w->kern_id);
+                    w->wfDynId, w->kernId);
  
              computeUnit->shader->dispatcher->notifyWgCompl(w);
              w->status = Wavefront::S_STOPPED;
          } else {
-            w->outstanding_reqs--;
+            w->outstandingReqs--;
          }
  
          DPRINTF(GPUSync, "CU%d: WF[%d][%d]: barrier_cnt = %d\n",
                  computeUnit->cu_id, gpuDynInst->simdId,
-                gpuDynInst->wfSlotId, w->barrier_cnt);
+                gpuDynInst->wfSlotId, w->barrierCnt);
  
          if (gpuDynInst->useContinuation) {
              assert(gpuDynInst->scope != Enums::MEMORY_SCOPE_NONE);
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc

index d1d011c0dc66478c2541ddfa3e8cbca62b698c7e..adc1f51bd55973af502c4226082f0cd9a224f08b 100644 (file)
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -305,7 +305,7 @@ GpuDispatcher::exec()
  void
  GpuDispatcher::notifyWgCompl(Wavefront *w)
  {
-    int kern_id = w->kern_id;
+    int kern_id = w->kernId;
      DPRINTF(GPUDisp, "notify WgCompl %d\n",kern_id);
      assert(ndRangeMap[kern_id].dispatchId == kern_id);
      ndRangeMap[kern_id].numWgCompleted++;
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc

index 1f0a7d78e13ad625597c0e78910b1d0069da9a63..9104c400e97894e8c059d32f7ca59193c9783e30 100644 (file)
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -115,7 +115,7 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
  {
      // calculate the virtual address to fetch from the SQC
      Addr vaddr = wavefront->pc() + wavefront->instructionBuffer.size();
-    vaddr = wavefront->base_ptr +  vaddr * sizeof(GPUStaticInst*);
+    vaddr = wavefront->basePtr +  vaddr * sizeof(GPUStaticInst*);
  
      DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
              computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc

index a6a4d86dbe7c861a7751c5013870adcb7aa786cd..102905ec848cac24987c9d48b6c60887d13289b3 100644 (file)
--- a/src/gpu-compute/global_memory_pipeline.cc
+++ b/src/gpu-compute/global_memory_pipeline.cc
@@ -212,16 +212,16 @@ GlobalMemPipeline::doGmReturn(GPUDynInstPtr m)
      }
  
      // Decrement outstanding register count
-    computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1);
+    computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
  
      if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op) ||
          MO_H(m->m_op)) {
-        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_gm, m->time,
+        computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrGm, m->time,
                                           -1);
      }
  
      if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
-        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_gm, m->time,
+        computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdGm, m->time,
                                           -1);
      }
  
diff --git a/src/gpu-compute/local_memory_pipeline.cc b/src/gpu-compute/local_memory_pipeline.cc

index a970d8f9b7438dc4133581b4b8e19a28bbf7c92a..e2238bf4546281d25084b508a077bb63aec4d0b4 100644 (file)
--- a/src/gpu-compute/local_memory_pipeline.cc
+++ b/src/gpu-compute/local_memory_pipeline.cc
@@ -170,16 +170,16 @@ LocalMemPipeline::doSmReturn(GPUDynInstPtr m)
      }
  
      // Decrement outstanding request count
-    computeUnit->shader->ScheduleAdd(&w->outstanding_reqs, m->time, -1);
+    computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
  
      if (m->m_op == Enums::MO_ST || MO_A(m->m_op) || MO_ANR(m->m_op)
          || MO_H(m->m_op)) {
-        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_wr_lm,
+        computeUnit->shader->ScheduleAdd(&w->outstandingReqsWrLm,
                                           m->time, -1);
      }
  
      if (m->m_op == Enums::MO_LD || MO_A(m->m_op) || MO_ANR(m->m_op)) {
-        computeUnit->shader->ScheduleAdd(&w->outstanding_reqs_rd_lm,
+        computeUnit->shader->ScheduleAdd(&w->outstandingReqsRdLm,
                                           m->time, -1);
      }
  
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc

index a20330082c6bd325618b5a058448e0d07d768b12..c73307ac4d7e7b946faf58f6e83a874c3370e9d7 100644 (file)
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -52,43 +52,43 @@ WavefrontParams::create()
  Wavefront::Wavefront(const Params *p)
    : SimObject(p), callArgMem(nullptr)
  {
-    last_trace = 0;
+    lastTrace = 0;
      simdId = p->simdId;
      wfSlotId = p->wf_slot_id;
      status = S_STOPPED;
      reservedVectorRegs = 0;
      startVgprIndex = 0;
-    outstanding_reqs = 0;
-    mem_reqs_in_pipe = 0;
-    outstanding_reqs_wr_gm = 0;
-    outstanding_reqs_wr_lm = 0;
-    outstanding_reqs_rd_gm = 0;
-    outstanding_reqs_rd_lm = 0;
-    rd_lm_reqs_in_pipe = 0;
-    rd_gm_reqs_in_pipe = 0;
-    wr_lm_reqs_in_pipe = 0;
-    wr_gm_reqs_in_pipe = 0;
-
-    barrier_cnt = 0;
-    old_barrier_cnt = 0;
+    outstandingReqs = 0;
+    memReqsInPipe = 0;
+    outstandingReqsWrGm = 0;
+    outstandingReqsWrLm = 0;
+    outstandingReqsRdGm = 0;
+    outstandingReqsRdLm = 0;
+    rdLmReqsInPipe = 0;
+    rdGmReqsInPipe = 0;
+    wrLmReqsInPipe = 0;
+    wrGmReqsInPipe = 0;
+
+    barrierCnt = 0;
+    oldBarrierCnt = 0;
      stalledAtBarrier = false;
  
-    mem_trace_busy = 0;
-    old_vgpr_tcnt = 0xffffffffffffffffll;
-    old_dgpr_tcnt = 0xffffffffffffffffll;
-    old_vgpr.resize(p->wfSize);
+    memTraceBusy = 0;
+    oldVgprTcnt = 0xffffffffffffffffll;
+    oldDgprTcnt = 0xffffffffffffffffll;
+    oldVgpr.resize(p->wfSize);
  
      pendingFetch = false;
      dropFetch = false;
      condRegState = new ConditionRegisterState();
      maxSpVgprs = 0;
      maxDpVgprs = 0;
-    last_addr.resize(p->wfSize);
-    workitemFlatId.resize(p->wfSize);
-    old_dgpr.resize(p->wfSize);
-    bar_cnt.resize(p->wfSize);
+    lastAddr.resize(p->wfSize);
+    workItemFlatId.resize(p->wfSize);
+    oldDgpr.resize(p->wfSize);
+    barCnt.resize(p->wfSize);
      for (int i = 0; i < 3; ++i) {
-        workitemid[i].resize(p->wfSize);
+        workItemId[i].resize(p->wfSize);
      }
  }
  
@@ -158,7 +158,7 @@ void
  Wavefront::start(uint64_t _wfDynId,uint64_t _base_ptr)
  {
      wfDynId = _wfDynId;
-    base_ptr = _base_ptr;
+    basePtr = _base_ptr;
      status = S_RUNNING;
  }
  
@@ -333,12 +333,12 @@ Wavefront::ready(itype_e type)
  
      // Is the wave waiting at a barrier
      if (stalledAtBarrier) {
-        if (!computeUnit->AllAtBarrier(barrier_id,barrier_cnt,
-                        computeUnit->getRefCounter(dispatchid, wg_id))) {
+        if (!computeUnit->AllAtBarrier(barrierId,barrierCnt,
+                        computeUnit->getRefCounter(dispatchId, wgId))) {
              // Are all threads at barrier?
              return 0;
          }
-        old_barrier_cnt = barrier_cnt;
+        oldBarrierCnt = barrierCnt;
          stalledAtBarrier = false;
      }
  
@@ -395,7 +395,7 @@ Wavefront::ready(itype_e type)
          }
  
          // Are there in pipe or outstanding memory requests?
-        if ((outstanding_reqs + mem_reqs_in_pipe) > 0) {
+        if ((outstandingReqs + memReqsInPipe) > 0) {
              return 0;
          }
  
@@ -416,7 +416,7 @@ Wavefront::ready(itype_e type)
          }
  
          // Are there in pipe or outstanding memory requests?
-        if ((outstanding_reqs + mem_reqs_in_pipe) > 0) {
+        if ((outstandingReqs + memReqsInPipe) > 0) {
              return 0;
          }
  
@@ -444,7 +444,7 @@ Wavefront::ready(itype_e type)
          // Here Global memory instruction
          if (IS_OT_READ_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType())) {
              // Are there in pipe or outstanding global memory write requests?
-            if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) {
+            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
                  return 0;
              }
          }
@@ -452,7 +452,7 @@ Wavefront::ready(itype_e type)
          if (IS_OT_WRITE_GM(ii->opType()) || IS_OT_ATOMIC_GM(ii->opType()) ||
              IS_OT_HIST_GM(ii->opType())) {
              // Are there in pipe or outstanding global memory read requests?
-            if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0)
+            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0)
                  return 0;
          }
  
@@ -467,7 +467,7 @@ Wavefront::ready(itype_e type)
          }
  
          if (!computeUnit->globalMemoryPipe.
-            isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
+            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
              // Can we insert a new request to the Global Mem Request FIFO?
              return 0;
          }
@@ -484,14 +484,14 @@ Wavefront::ready(itype_e type)
                 IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()))) {
          // Here for Shared memory instruction
          if (IS_OT_READ_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType())) {
-            if ((outstanding_reqs_wr_lm + wr_lm_reqs_in_pipe) > 0) {
+            if ((outstandingReqsWrLm + wrLmReqsInPipe) > 0) {
                  return 0;
              }
          }
  
          if (IS_OT_WRITE_LM(ii->opType()) || IS_OT_ATOMIC_LM(ii->opType()) ||
              IS_OT_HIST_LM(ii->opType())) {
-            if ((outstanding_reqs_rd_lm + rd_lm_reqs_in_pipe) > 0) {
+            if ((outstandingReqsRdLm + rdLmReqsInPipe) > 0) {
                  return 0;
              }
          }
@@ -506,7 +506,7 @@ Wavefront::ready(itype_e type)
          }
  
          if (!computeUnit->localMemoryPipe.
-            isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) {
+            isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
              // Can we insert a new request to the LDS Request FIFO?
              return 0;
          }
@@ -523,14 +523,14 @@ Wavefront::ready(itype_e type)
                 IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()))) {
          // Here for Private memory instruction ------------------------    //
          if (IS_OT_READ_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType())) {
-            if ((outstanding_reqs_wr_gm + wr_gm_reqs_in_pipe) > 0) {
+            if ((outstandingReqsWrGm + wrGmReqsInPipe) > 0) {
                  return 0;
              }
          }
  
          if (IS_OT_WRITE_PM(ii->opType()) || IS_OT_ATOMIC_PM(ii->opType()) ||
              IS_OT_HIST_PM(ii->opType())) {
-            if ((outstanding_reqs_rd_gm + rd_gm_reqs_in_pipe) > 0) {
+            if ((outstandingReqsRdGm + rdGmReqsInPipe) > 0) {
                  return 0;
              }
          }
@@ -546,7 +546,7 @@ Wavefront::ready(itype_e type)
          }
  
          if (!computeUnit->globalMemoryPipe.
-            isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
+            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
              // Can we insert a new request to the Global Mem Request FIFO?
              return 0;
          }
@@ -579,13 +579,13 @@ Wavefront::ready(itype_e type)
              return 0;
          }
          if (!computeUnit->globalMemoryPipe.
-            isGMReqFIFOWrRdy(rd_gm_reqs_in_pipe + wr_gm_reqs_in_pipe)) {
+            isGMReqFIFOWrRdy(rdGmReqsInPipe + wrGmReqsInPipe)) {
              // Can we insert a new request to the Global Mem Request FIFO?
              return 0;
          }
  
          if (!computeUnit->localMemoryPipe.
-            isLMReqFIFOWrRdy(rd_lm_reqs_in_pipe + wr_lm_reqs_in_pipe)) {
+            isLMReqFIFOWrRdy(rdLmReqsInPipe + wrLmReqsInPipe)) {
              // Can we insert a new request to the LDS Request FIFO?
              return 0;
          }
@@ -636,8 +636,8 @@ Wavefront::updateResources()
                                             ticks(computeUnit->issuePeriod));
      } else if (ii->opType() == Enums::OT_FLAT_READ) {
          assert(Enums::SC_NONE != ii->executedAs());
-        mem_reqs_in_pipe++;
-        rd_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        rdGmReqsInPipe++;
          if ( Enums::SC_SHARED == ii->executedAs() ) {
              computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
                  preset(computeUnit->shader->ticks(4));
@@ -651,8 +651,8 @@ Wavefront::updateResources()
          }
      } else if (ii->opType() == Enums::OT_FLAT_WRITE) {
          assert(Enums::SC_NONE != ii->executedAs());
-        mem_reqs_in_pipe++;
-        wr_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrGmReqsInPipe++;
          if (Enums::SC_SHARED == ii->executedAs()) {
              computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
                  preset(computeUnit->shader->ticks(8));
@@ -665,67 +665,67 @@ Wavefront::updateResources()
                  preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
          }
      } else if (IS_OT_READ_GM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        rd_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        rdGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(4));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_WRITE_GM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_ATOMIC_GM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_gm_reqs_in_pipe++;
-        rd_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrGmReqsInPipe++;
+        rdGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_READ_LM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        rd_lm_reqs_in_pipe++;
+        memReqsInPipe++;
+        rdLmReqsInPipe++;
          computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
              preset(computeUnit->shader->ticks(4));
          computeUnit->wfWait[computeUnit->ShrMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_WRITE_LM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_lm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrLmReqsInPipe++;
          computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->ShrMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_ATOMIC_LM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_lm_reqs_in_pipe++;
-        rd_lm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrLmReqsInPipe++;
+        rdLmReqsInPipe++;
          computeUnit->vrfToLocalMemPipeBus[computeUnit->nextLocRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->ShrMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_READ_PM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        rd_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        rdGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(4));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_WRITE_PM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
              preset(computeUnit->shader->ticks(computeUnit->issuePeriod));
      } else if (IS_OT_ATOMIC_PM(ii->opType())) {
-        mem_reqs_in_pipe++;
-        wr_gm_reqs_in_pipe++;
-        rd_gm_reqs_in_pipe++;
+        memReqsInPipe++;
+        wrGmReqsInPipe++;
+        rdGmReqsInPipe++;
          computeUnit->vrfToGlobalMemPipeBus[computeUnit->nextGlbRdBus()].
              preset(computeUnit->shader->ticks(8));
          computeUnit->wfWait[computeUnit->GlbMemUnitId()].
@@ -865,7 +865,7 @@ Wavefront::exec()
  bool
  Wavefront::waitingAtBarrier(int lane)
  {
-    return bar_cnt[lane] < max_bar_cnt;
+    return barCnt[lane] < maxBarCnt;
  }
  
  void
diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh

index 5a5386a3d4cedd93dd194c81d991e0cd5028ba97..db2e434a941fd22ef635849febdfdd1b026d46a8 100644 (file)
--- a/src/gpu-compute/wavefront.hh
+++ b/src/gpu-compute/wavefront.hh
@@ -155,16 +155,16 @@ class Wavefront : public SimObject
      enum status_e {S_STOPPED,S_RETURNING,S_RUNNING};
  
      // Base pointer for array of instruction pointers
-    uint64_t base_ptr;
+    uint64_t basePtr;
  
-    uint32_t old_barrier_cnt;
-    uint32_t barrier_cnt;
-    uint32_t barrier_id;
-    uint32_t barrier_slots;
+    uint32_t oldBarrierCnt;
+    uint32_t barrierCnt;
+    uint32_t barrierId;
+    uint32_t barrierSlots;
      status_e status;
      // HW slot id where the WF is mapped to inside a SIMD unit
      int wfSlotId;
-    int kern_id;
+    int kernId;
      // SIMD unit where the WV has been scheduled
      int simdId;
      // pointer to parent CU
@@ -193,37 +193,37 @@ class Wavefront : public SimObject
      bool isOldestInstALU();
      bool isOldestInstBarrier();
      // used for passing spill address to DDInstGPU
-    std::vector<Addr> last_addr;
-    std::vector<uint32_t> workitemid[3];
-    std::vector<uint32_t> workitemFlatId;
-    uint32_t workgroupid[3];
-    uint32_t workgroupsz[3];
-    uint32_t gridsz[3];
-    uint32_t wg_id;
-    uint32_t wg_sz;
-    uint32_t dynwaveid;
-    uint32_t maxdynwaveid;
-    uint32_t dispatchid;
+    std::vector<Addr> lastAddr;
+    std::vector<uint32_t> workItemId[3];
+    std::vector<uint32_t> workItemFlatId;
+    uint32_t workGroupId[3];
+    uint32_t workGroupSz[3];
+    uint32_t gridSz[3];
+    uint32_t wgId;
+    uint32_t wgSz;
+    uint32_t dynWaveId;
+    uint32_t maxDynWaveId;
+    uint32_t dispatchId;
      // outstanding global+local memory requests
-    uint32_t outstanding_reqs;
+    uint32_t outstandingReqs;
      // memory requests between scoreboard
      // and execute stage not yet executed
-    uint32_t mem_reqs_in_pipe;
+    uint32_t memReqsInPipe;
      // outstanding global memory write requests
-    uint32_t outstanding_reqs_wr_gm;
+    uint32_t outstandingReqsWrGm;
      // outstanding local memory write requests
-    uint32_t outstanding_reqs_wr_lm;
+    uint32_t outstandingReqsWrLm;
      // outstanding global memory read requests
-    uint32_t outstanding_reqs_rd_gm;
+    uint32_t outstandingReqsRdGm;
      // outstanding local memory read requests
-    uint32_t outstanding_reqs_rd_lm;
-    uint32_t rd_lm_reqs_in_pipe;
-    uint32_t rd_gm_reqs_in_pipe;
-    uint32_t wr_lm_reqs_in_pipe;
-    uint32_t wr_gm_reqs_in_pipe;
-
-    int mem_trace_busy;
-    uint64_t last_trace;
+    uint32_t outstandingReqsRdLm;
+    uint32_t rdLmReqsInPipe;
+    uint32_t rdGmReqsInPipe;
+    uint32_t wrLmReqsInPipe;
+    uint32_t wrGmReqsInPipe;
+
+    int memTraceBusy;
+    uint64_t lastTrace;
      // number of vector registers reserved by WF
      int reservedVectorRegs;
      // Index into the Vector Register File's namespace where the WF's registers
@@ -231,25 +231,25 @@ class Wavefront : public SimObject
      uint32_t startVgprIndex;
  
      // Old value of destination gpr (for trace)
-    std::vector<uint32_t> old_vgpr;
+    std::vector<uint32_t> oldVgpr;
      // Id of destination gpr (for trace)
-    uint32_t old_vgpr_id;
+    uint32_t oldVgprId;
      // Tick count of last old_vgpr copy
-    uint64_t old_vgpr_tcnt;
+    uint64_t oldVgprTcnt;
  
      // Old value of destination gpr (for trace)
-    std::vector<uint64_t> old_dgpr;
+    std::vector<uint64_t> oldDgpr;
      // Id of destination gpr (for trace)
-    uint32_t old_dgpr_id;
+    uint32_t oldDgprId;
      // Tick count of last old_vgpr copy
-    uint64_t old_dgpr_tcnt;
+    uint64_t oldDgprTcnt;
  
      // Execution mask at wavefront start
-    VectorMask init_mask;
+    VectorMask initMask;
  
      // number of barriers this WF has joined
-    std::vector<int> bar_cnt;
-    int max_bar_cnt;
+    std::vector<int> barCnt;
+    int maxBarCnt;
      // Flag to stall a wave on barrier
      bool stalledAtBarrier;
  
@@ -333,7 +333,7 @@ class Wavefront : public SimObject
      int ready(itype_e type);
      bool instructionBufferHasBranch();
      void regStats();
-    VectorMask get_pred() { return execMask() & init_mask; }
+    VectorMask getPred() { return execMask() & initMask; }
  
      bool waitingAtBarrier(int lane);
author	Alexandru Dutu <alexandru.dutu@amd.com>
	Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)
committer	Alexandru Dutu <alexandru.dutu@amd.com>
	Fri, 16 Sep 2016 16:26:52 +0000 (12:26 -0400)
src/arch/hsail/gen.py		patch \| blob \| history
src/arch/hsail/insts/decl.hh		patch \| blob \| history
src/arch/hsail/insts/main.cc		patch \| blob \| history
src/arch/hsail/insts/mem_impl.hh		patch \| blob \| history
src/arch/hsail/insts/pseudo_inst.cc		patch \| blob \| history
src/gpu-compute/compute_unit.cc		patch \| blob \| history
src/gpu-compute/dispatcher.cc		patch \| blob \| history
src/gpu-compute/fetch_unit.cc		patch \| blob \| history
src/gpu-compute/global_memory_pipeline.cc		patch \| blob \| history
src/gpu-compute/local_memory_pipeline.cc		patch \| blob \| history
src/gpu-compute/wavefront.cc		patch \| blob \| history
src/gpu-compute/wavefront.hh		patch \| blob \| history