From: Matt Sinclair <Matthew.Sinclair@amd.com>
Date: Tue, 10 Jul 2018 06:58:28 +0000 (-0400)
Subject: arch-gcn3: add support for v_mbcnt_hi and v_mbcnt_lo
X-Git-Tag: v20.1.0.0~448
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1836d58b3640c1830065af81328652d5fc03d81f;p=gem5.git

arch-gcn3: add support for v_mbcnt_hi and v_mbcnt_lo

Change-Id: I1c70fe693c904f1abd7d5a2b99220c74a075eae5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29948
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---

diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc
index 79e7ddacf..6ffd049f2 100644
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -30309,8 +30309,36 @@ namespace Gcn3ISA
     void
     Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+        ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
+        ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
+        VecOperandU32 vdst(gpuDynInst, instData.VDST);
+        uint64_t threadMask = 0;
+
+        src0.readSrc();
+        src1.readSrc();
+
+        /**
+         * input modifiers are supported by FP operations only
+         */
+        assert(!(instData.ABS & 0x1));
+        assert(!(instData.ABS & 0x2));
+        assert(!(instData.ABS & 0x4));
+        assert(!(extData.NEG & 0x1));
+        assert(!(extData.NEG & 0x2));
+        assert(!(extData.NEG & 0x4));
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (wf->execMask(lane)) {
+                threadMask = ((1LL << lane) - 1LL);
+                vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
+                             src1[lane];
+            }
+        }
+
+        vdst.write();
+    } // execute
+    // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
 
     Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
           InFmt_VOP3 *iFmt)
@@ -30330,8 +30358,36 @@ namespace Gcn3ISA
     void
     Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+        ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
+        ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
+        VecOperandU32 vdst(gpuDynInst, instData.VDST);
+        uint64_t threadMask = 0;
+
+        src0.readSrc();
+        src1.readSrc();
+
+        /**
+         * input modifiers are supported by FP operations only
+         */
+        assert(!(instData.ABS & 0x1));
+        assert(!(instData.ABS & 0x2));
+        assert(!(instData.ABS & 0x4));
+        assert(!(extData.NEG & 0x1));
+        assert(!(extData.NEG & 0x2));
+        assert(!(extData.NEG & 0x4));
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (wf->execMask(lane)) {
+                threadMask = ((1LL << lane) - 1LL);
+                vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
+                             src1[lane];
+            }
+        }
+
+        vdst.write();
+    } // execute
+    // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
 
     Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt)
         : Inst_VOP3(iFmt, "v_lshlrev_b64", false)