From 550f0203aa06414754dfb694eb18b6ec386aafac Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Thu, 14 Feb 2019 13:27:55 -0500 Subject: [PATCH] arch-gcn3: Remove invalid assert when reading EXEC_LO This assert assumed all reads to EXEC_LO would be 64b, that is, we would always read the entire EXEC mask. This is invalid as some kernels read only the low 32b of EXEC. The write to EXEC_LO is also updated to handle 32b writes. Change-Id: Ifeb167578515bf112b1eab70bbf2201a5e936358 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29960 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/gcn3/operand.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/gcn3/operand.hh b/src/arch/gcn3/operand.hh index 960d05eee..9d28deb30 100644 --- a/src/arch/gcn3/operand.hh +++ b/src/arch/gcn3/operand.hh @@ -435,9 +435,10 @@ namespace Gcn3ISA if (!isScalarReg(_opIdx)) { if (_opIdx == REG_EXEC_LO) { - ScalarRegU64 new_exec_mask_val(0); + ScalarRegU64 new_exec_mask_val + = wf->execMask().to_ullong(); std::memcpy((void*)&new_exec_mask_val, - (void*)srfData.data(), sizeof(new_exec_mask_val)); + (void*)srfData.data(), sizeof(srfData)); VectorMask new_exec_mask(new_exec_mask_val); wf->execMask() = new_exec_mask; DPRINTF(GPUSRF, "Write EXEC\n"); @@ -513,7 +514,6 @@ namespace Gcn3ISA switch(_opIdx) { case REG_EXEC_LO: { - assert(NumDwords == 2); ScalarRegU64 exec_mask = _gpuDynInst->wavefront()-> execMask().to_ullong(); std::memcpy((void*)srfData.data(), (void*)&exec_mask, -- 2.30.2