gpu-compute: Fix Y-dimension ABI decode
authorMichael LeBeane <Michael.Lebeane@amd.com>
Tue, 23 Apr 2019 18:49:31 +0000 (14:49 -0400)
committerAnthony Gutierrez <anthony.gutierrez@amd.com>
Fri, 17 Jul 2020 16:32:56 +0000 (16:32 +0000)
We currently have a bug in decoding workitem ID from the kernel
descriptor with multiple dimensions.  The enable_vgpr_workitem_id bits
are currently seperated into x and y components, when they should be
treated as a single 2 bit value, where y is enabled when it is > 0,
and z is enabled when it is > 1.  The current setup allows a kernel
launch with vgprs reserved for the z dimension and not the y dimension,
which is incorrect.

Change-Id: Iee64b207feb95bcf064898d5db33b8f201e25323
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29965
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
src/gpu-compute/hsa_queue_entry.hh
src/gpu-compute/kernel_code.hh

index 5fc5e56c2ee98c507361f682e14e5cd68d7d6a15..ea798696958ffe207fa900dbcfcf2b5ca2f41452 100644 (file)
@@ -417,8 +417,8 @@ class HSAQueueEntry
          * workitem Id in the X dimension is always initialized.
          */
         initialVgprState.set(WorkitemIdX, true);
-        initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id_y);
-        initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id_z);
+        initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id > 0);
+        initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id > 1);
     }
 
     // name of the kernel associated with the AQL entry
index b3560c7e5c04fbe77e649298da22b3adeb9b99bb..680dd720e1ba60067d0de1bd6c9c5ce0c75c555f 100644 (file)
@@ -130,8 +130,7 @@ struct AMDKernelCode
     uint32_t enable_sgpr_workgroup_id_y : 1;
     uint32_t enable_sgpr_workgroup_id_z : 1;
     uint32_t enable_sgpr_workgroup_info : 1;
-    uint32_t enable_vgpr_workitem_id_y : 1;
-    uint32_t enable_vgpr_workitem_id_z : 1;
+    uint32_t enable_vgpr_workitem_id : 2;
     uint32_t enable_exception_address_watch : 1;
     uint32_t enable_exception_memory_violation : 1;
     uint32_t granulated_lds_size : 9;