arch-gcn3: Implement s_buffer_load_dwordx16
authorTony Gutierrez <anthony.gutierrez@amd.com>
Fri, 7 Dec 2018 00:24:19 +0000 (19:24 -0500)
committerAnthony Gutierrez <anthony.gutierrez@amd.com>
Fri, 17 Jul 2020 16:13:17 +0000 (16:13 +0000)
Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29957
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
src/arch/gcn3/insts/instructions.cc
src/arch/gcn3/insts/op_encodings.hh

index 567cc1024bb36347d9915d2cab6a5316f5e6d03a..71efd8fda2bc471340409c7427afaebeec96dff0 100644 (file)
@@ -4857,17 +4857,45 @@ namespace Gcn3ISA
     void
     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
-    }
+        Wavefront *wf = gpuDynInst->wavefront();
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+        ScalarRegU32 offset(0);
+        ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
+
+        rsrcDesc.read();
+
+        if (instData.IMM) {
+            offset = extData.OFFSET;
+        } else {
+            ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
+            off_sgpr.read();
+            offset = off_sgpr.rawData();
+        }
+
+        calcAddr(gpuDynInst, rsrcDesc, offset);
+
+        gpuDynInst->computeUnit()->scalarMemoryPipe
+            .getGMReqFIFO().push(gpuDynInst);
+
+        wf->scalarRdGmReqsInPipe--;
+        wf->scalarOutstandingReqsRdGm++;
+        gpuDynInst->wavefront()->outstandingReqs++;
+        gpuDynInst->wavefront()->validateRequestCounters();
+    } // execute
 
     void
     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
     {
+        initMemRead<16>(gpuDynInst);
     } // initiateAcc
 
     void
     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
     {
+        ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
+        sdst.write();
     } // completeAcc
 
     Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
index 4056f0a9376ee6460b561b2d092d05415c982759..202dd1d822115295fa5632ddd1eda4f2a5617926 100644 (file)
 
 namespace Gcn3ISA
 {
+    struct BufferRsrcDescriptor
+    {
+        uint64_t baseAddr : 48;
+        uint32_t stride : 14;
+        uint32_t cacheSwizzle : 1;
+        uint32_t swizzleEn : 1;
+        uint32_t numRecords : 32;
+        uint32_t dstSelX : 3;
+        uint32_t dstSelY : 3;
+        uint32_t dstSelZ : 3;
+        uint32_t dstSelW : 3;
+        uint32_t numFmt : 3;
+        uint32_t dataFmt : 4;
+        uint32_t elemSize : 2;
+        uint32_t idxStride : 2;
+        uint32_t addTidEn : 1;
+        uint32_t atc : 1;
+        uint32_t hashEn : 1;
+        uint32_t heap : 1;
+        uint32_t mType : 3;
+        uint32_t type : 2;
+    };
+
     // --- purely virtual instruction classes ---
 
     class Inst_SOP2 : public GCN3GPUStaticInst
@@ -197,14 +220,45 @@ namespace Gcn3ISA
                                                     MemCmd::WriteReq);
         }
 
+        /**
+         * For normal s_load_dword/s_store_dword instruction addresses.
+         */
+        void
+        calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr,
+                 ScalarRegU32 offset)
+        {
+            Addr vaddr = ((addr.rawData() + offset) & ~0x3);
+            gpu_dyn_inst->scalarAddr = vaddr;
+        }
+
+        /**
+         * For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
+         * The s_buffer instructions use the same buffer resource descriptor
+         * as the MUBUF instructions.
+         */
         void
-        calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr,
-            ScalarRegU32 offset)
+        calcAddr(GPUDynInstPtr gpu_dyn_inst,
+                 ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
         {
-            Addr vaddr = addr.rawData();
-            vaddr += offset;
-            vaddr &= ~0x3;
-            gpuDynInst->scalarAddr = vaddr;
+            BufferRsrcDescriptor rsrc_desc;
+            ScalarRegU32 clamped_offset(offset);
+            std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
+                        sizeof(BufferRsrcDescriptor));
+
+            /**
+             * The address is clamped if:
+             *     Stride is zero: clamp if offset >= num_records
+             *     Stride is non-zero: clamp if offset > (stride * num_records)
+             */
+            if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) {
+                clamped_offset = rsrc_desc.numRecords;
+            } else if (rsrc_desc.stride && offset
+                       > (rsrc_desc.stride * rsrc_desc.numRecords)) {
+                clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
+            }
+
+            Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
+            gpu_dyn_inst->scalarAddr = vaddr;
         }
 
         // first instruction DWORD
@@ -469,29 +523,6 @@ namespace Gcn3ISA
         int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
 
       protected:
-        struct BufferRsrcDescriptor
-        {
-            uint64_t baseAddr : 48;
-            uint32_t stride : 14;
-            uint32_t cacheSwizzle : 1;
-            uint32_t swizzleEn : 1;
-            uint32_t numRecords : 32;
-            uint32_t dstSelX : 3;
-            uint32_t dstSelY : 3;
-            uint32_t dstSelZ : 3;
-            uint32_t dstSelW : 3;
-            uint32_t numFmt : 3;
-            uint32_t dataFmt : 4;
-            uint32_t elemSize : 2;
-            uint32_t idxStride : 2;
-            uint32_t addTidEn : 1;
-            uint32_t atc : 1;
-            uint32_t hashEn : 1;
-            uint32_t heap : 1;
-            uint32_t mType : 3;
-            uint32_t type : 2;
-        };
-
         template<typename T>
         void
         initMemRead(GPUDynInstPtr gpuDynInst)