arch-gcn3: Implementation of flat atomic swap instruction

author Alexandru Dutu <alexandru.dutu@amd.com>

Thu, 9 Nov 2017 07:20:54 +0000 (02:20 -0500)

committer Anthony Gutierrez <anthony.gutierrez@amd.com>

Mon, 13 Jul 2020 23:32:27 +0000 (23:32 +0000)
author Alexandru Dutu <alexandru.dutu@amd.com>
Thu, 9 Nov 2017 07:20:54 +0000 (02:20 -0500)
committer Anthony Gutierrez <anthony.gutierrez@amd.com>
Mon, 13 Jul 2020 23:32:27 +0000 (23:32 +0000)
diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc

index 2e39bf5c40d44cb2597d200c73ec05870d7729d8..607e3c6f20eee6edf1129e5f6b0b2fda893eaa48 100644 (file)
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -39231,8 +39231,80 @@ namespace Gcn3ISA
      void
      Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
      {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (wf->execMask().none()) {
+            wf->wrGmReqsInPipe--;
+            wf->rdGmReqsInPipe--;
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->exec_mask = wf->execMask();
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+
+        addr.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
+            gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
+            // TODO: additional address computation required for scratch
+            panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
+                     "Flats to private aperture not tested yet\n");
+            gpuDynInst->computeUnit()->globalMemoryPipe.
+                issueRequest(gpuDynInst);
+            wf->wrGmReqsInPipe--;
+            wf->outstandingReqsWrGm++;
+            wf->rdGmReqsInPipe--;
+            wf->outstandingReqsRdGm++;
+        } else {
+            fatal("Non global flat instructions not implemented yet.\n");
+        }
+
+        gpuDynInst->wavefront()->outstandingReqs++;
+        gpuDynInst->wavefront()->validateRequestCounters();
+
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+
+        data.read();
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+    } // execute
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
+
+    // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
  
      Inst_FLAT__FLAT_ATOMIC_CMPSWAP
          ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
diff --git a/src/arch/gcn3/insts/instructions.hh b/src/arch/gcn3/insts/instructions.hh

index ff0cfea85c8e6ce65ecac52cfaea36f64b359d08..b0cc37e8f1a0d8d7af2a260edf4add202e6617fb 100644 (file)
--- a/src/arch/gcn3/insts/instructions.hh
+++ b/src/arch/gcn3/insts/instructions.hh
@@ -79949,9 +79949,9 @@ namespace Gcn3ISA
                case 0: //vgpr_addr
                  return 8;
                case 1: //vgpr_src
-                return 32;
+                return 4;
                case 2: //vgpr_dst
-                return 32;
+                return 4;
                default:
                  fatal("op idx %i out of bounds\n", opIdx);
                  return -1;
@@ -79991,6 +79991,8 @@ namespace Gcn3ISA
          } // isDstOperand
  
          void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
      }; // Inst_FLAT__FLAT_ATOMIC_SWAP
  
      class Inst_FLAT__FLAT_ATOMIC_CMPSWAP : public Inst_FLAT
author	Alexandru Dutu <alexandru.dutu@amd.com>
	Thu, 9 Nov 2017 07:20:54 +0000 (02:20 -0500)
committer	Anthony Gutierrez <anthony.gutierrez@amd.com>
	Mon, 13 Jul 2020 23:32:27 +0000 (23:32 +0000)
src/arch/gcn3/insts/instructions.cc		patch \| blob \| history
src/arch/gcn3/insts/instructions.hh		patch \| blob \| history