From: Tony Gutierrez Date: Fri, 7 Dec 2018 00:24:19 +0000 (-0500) Subject: arch-gcn3: Implement s_buffer_load_dwordx16 X-Git-Tag: v20.1.0.0~437 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=513e75d99a47d3cf62c9d519ca51c55453c9e10e;p=gem5.git arch-gcn3: Implement s_buffer_load_dwordx16 Change-Id: I25382dcae9bb55eaf035385fa925157f25d39c20 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29957 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 567cc1024..71efd8fda 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -4857,17 +4857,45 @@ namespace Gcn3ISA void Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); - } + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .getGMReqFIFO().push(gpuDynInst); + + wf->scalarRdGmReqsInPipe--; + wf->scalarOutstandingReqsRdGm++; + gpuDynInst->wavefront()->outstandingReqs++; + gpuDynInst->wavefront()->validateRequestCounters(); + } // execute void Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) { + initMemRead<16>(gpuDynInst); } // initiateAcc void Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) { + ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); + sdst.write(); } // completeAcc Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) diff --git a/src/arch/gcn3/insts/op_encodings.hh b/src/arch/gcn3/insts/op_encodings.hh index 4056f0a93..202dd1d82 100644 --- a/src/arch/gcn3/insts/op_encodings.hh +++ b/src/arch/gcn3/insts/op_encodings.hh @@ -46,6 +46,29 @@ namespace Gcn3ISA { + struct BufferRsrcDescriptor + { + uint64_t baseAddr : 48; + uint32_t stride : 14; + uint32_t cacheSwizzle : 1; + uint32_t swizzleEn : 1; + uint32_t numRecords : 32; + uint32_t dstSelX : 3; + uint32_t dstSelY : 3; + uint32_t dstSelZ : 3; + uint32_t dstSelW : 3; + uint32_t numFmt : 3; + uint32_t dataFmt : 4; + uint32_t elemSize : 2; + uint32_t idxStride : 2; + uint32_t addTidEn : 1; + uint32_t atc : 1; + uint32_t hashEn : 1; + uint32_t heap : 1; + uint32_t mType : 3; + uint32_t type : 2; + }; + // --- purely virtual instruction classes --- class Inst_SOP2 : public GCN3GPUStaticInst @@ -197,14 +220,45 @@ namespace Gcn3ISA MemCmd::WriteReq); } + /** + * For normal s_load_dword/s_store_dword instruction addresses. + */ + void + calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, + ScalarRegU32 offset) + { + Addr vaddr = ((addr.rawData() + offset) & ~0x3); + gpu_dyn_inst->scalarAddr = vaddr; + } + + /** + * For s_buffer_load_dword/s_buffer_store_dword instruction addresses. + * The s_buffer instructions use the same buffer resource descriptor + * as the MUBUF instructions. + */ void - calcAddr(GPUDynInstPtr gpuDynInst, ConstScalarOperandU64 &addr, - ScalarRegU32 offset) + calcAddr(GPUDynInstPtr gpu_dyn_inst, + ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset) { - Addr vaddr = addr.rawData(); - vaddr += offset; - vaddr &= ~0x3; - gpuDynInst->scalarAddr = vaddr; + BufferRsrcDescriptor rsrc_desc; + ScalarRegU32 clamped_offset(offset); + std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(), + sizeof(BufferRsrcDescriptor)); + + /** + * The address is clamped if: + * Stride is zero: clamp if offset >= num_records + * Stride is non-zero: clamp if offset > (stride * num_records) + */ + if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) { + clamped_offset = rsrc_desc.numRecords; + } else if (rsrc_desc.stride && offset + > (rsrc_desc.stride * rsrc_desc.numRecords)) { + clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords); + } + + Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3); + gpu_dyn_inst->scalarAddr = vaddr; } // first instruction DWORD @@ -469,29 +523,6 @@ namespace Gcn3ISA int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; protected: - struct BufferRsrcDescriptor - { - uint64_t baseAddr : 48; - uint32_t stride : 14; - uint32_t cacheSwizzle : 1; - uint32_t swizzleEn : 1; - uint32_t numRecords : 32; - uint32_t dstSelX : 3; - uint32_t dstSelY : 3; - uint32_t dstSelZ : 3; - uint32_t dstSelW : 3; - uint32_t numFmt : 3; - uint32_t dataFmt : 4; - uint32_t elemSize : 2; - uint32_t idxStride : 2; - uint32_t addTidEn : 1; - uint32_t atc : 1; - uint32_t hashEn : 1; - uint32_t heap : 1; - uint32_t mType : 3; - uint32_t type : 2; - }; - template void initMemRead(GPUDynInstPtr gpuDynInst)