From 8c3e9a19d5c9bdc26b6b06cf0440cff0f1cd89b5 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Wed, 10 Apr 2019 11:34:37 -0400 Subject: [PATCH] arch-gcn3: Updating implementation of atomics This changeset is moving the access of the data operand from initiateAcc to the execute method of atomic instructions. Change-Id: I1debae302f0b13f79ed2b7a9ed2f6b07fcec5128 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29926 Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- src/arch/gcn3/insts/instructions.cc | 97 +++++++++++++---------------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 26af2415f..32719ad27 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -39261,11 +39261,24 @@ namespace Gcn3ISA gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); addr.read(); + data.read(); + cmp.read(); calcAddr(gpuDynInst, addr); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->x_data))[lane] + = data[lane]; + (reinterpret_cast(gpuDynInst->a_data))[lane] + = cmp[lane]; + } + } + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL || gpuDynInst->executedAs() == Enums::SC_PRIVATE) { /** @@ -39293,21 +39306,6 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) { - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); - - data.read(); - cmp.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - initAtomicAccess(gpuDynInst); } // initiateAcc @@ -39364,11 +39362,20 @@ namespace Gcn3ISA gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); addr.read(); + data.read(); calcAddr(gpuDynInst, addr); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe. issueRequest(gpuDynInst); @@ -39387,17 +39394,6 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) { - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - initAtomicAccess(gpuDynInst); } // initiateAcc @@ -39733,11 +39729,24 @@ namespace Gcn3ISA gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA); + ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); addr.read(); + data.read(); + cmp.read(); calcAddr(gpuDynInst, addr); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->x_data))[lane] + = data[lane]; + (reinterpret_cast(gpuDynInst->a_data))[lane] + = cmp[lane]; + } + } + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL || gpuDynInst->executedAs() == Enums::SC_PRIVATE) { /** @@ -39765,21 +39774,6 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) { - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); - - data.read(); - cmp.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - initAtomicAccess(gpuDynInst); } // initiateAcc @@ -39837,10 +39831,20 @@ namespace Gcn3ISA gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA); addr.read(); + data.read(); calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) { gpuDynInst->computeUnit()->globalMemoryPipe. issueRequest(gpuDynInst); @@ -39859,17 +39863,6 @@ namespace Gcn3ISA void Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) { - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - initAtomicAccess(gpuDynInst); } // initiateAcc -- 2.30.2