arch-gcn3: Remove invalid assert when reading EXEC_LO
authorTony Gutierrez <anthony.gutierrez@amd.com>
Thu, 14 Feb 2019 18:27:55 +0000 (13:27 -0500)
committerAnthony Gutierrez <anthony.gutierrez@amd.com>
Fri, 17 Jul 2020 16:30:41 +0000 (16:30 +0000)
This assert assumed all reads to EXEC_LO would be
64b, that is, we would always read the entire EXEC
mask. This is invalid as some kernels read only
the low 32b of EXEC.

The write to EXEC_LO is also updated to handle 32b
writes.

Change-Id: Ifeb167578515bf112b1eab70bbf2201a5e936358
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29960
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
src/arch/gcn3/operand.hh

index 960d05eee90aaed47c7d58d64cfb8fb330e132b6..9d28deb3016b87202660813c350e622d9e9be624 100644 (file)
@@ -435,9 +435,10 @@ namespace Gcn3ISA
 
             if (!isScalarReg(_opIdx)) {
                 if (_opIdx == REG_EXEC_LO) {
-                    ScalarRegU64 new_exec_mask_val(0);
+                    ScalarRegU64 new_exec_mask_val
+                        = wf->execMask().to_ullong();
                     std::memcpy((void*)&new_exec_mask_val,
-                        (void*)srfData.data(), sizeof(new_exec_mask_val));
+                        (void*)srfData.data(), sizeof(srfData));
                     VectorMask new_exec_mask(new_exec_mask_val);
                     wf->execMask() = new_exec_mask;
                     DPRINTF(GPUSRF, "Write EXEC\n");
@@ -513,7 +514,6 @@ namespace Gcn3ISA
             switch(_opIdx) {
               case REG_EXEC_LO:
                 {
-                    assert(NumDwords == 2);
                     ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
                         execMask().to_ullong();
                     std::memcpy((void*)srfData.data(), (void*)&exec_mask,