From 83fe4754e756cb2bc17fe9dfc9a02d5804e7def2 Mon Sep 17 00:00:00 2001 From: Michael LeBeane Date: Tue, 23 Apr 2019 14:49:31 -0400 Subject: [PATCH] gpu-compute: Fix Y-dimension ABI decode We currently have a bug in decoding workitem ID from the kernel descriptor with multiple dimensions. The enable_vgpr_workitem_id bits are currently seperated into x and y components, when they should be treated as a single 2 bit value, where y is enabled when it is > 0, and z is enabled when it is > 1. The current setup allows a kernel launch with vgprs reserved for the z dimension and not the y dimension, which is incorrect. Change-Id: Iee64b207feb95bcf064898d5db33b8f201e25323 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29965 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/gpu-compute/hsa_queue_entry.hh | 4 ++-- src/gpu-compute/kernel_code.hh | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index 5fc5e56c2..ea7986969 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -417,8 +417,8 @@ class HSAQueueEntry * workitem Id in the X dimension is always initialized. */ initialVgprState.set(WorkitemIdX, true); - initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id_y); - initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id_z); + initialVgprState.set(WorkitemIdY, akc->enable_vgpr_workitem_id > 0); + initialVgprState.set(WorkitemIdZ, akc->enable_vgpr_workitem_id > 1); } // name of the kernel associated with the AQL entry diff --git a/src/gpu-compute/kernel_code.hh b/src/gpu-compute/kernel_code.hh index b3560c7e5..680dd720e 100644 --- a/src/gpu-compute/kernel_code.hh +++ b/src/gpu-compute/kernel_code.hh @@ -130,8 +130,7 @@ struct AMDKernelCode uint32_t enable_sgpr_workgroup_id_y : 1; uint32_t enable_sgpr_workgroup_id_z : 1; uint32_t enable_sgpr_workgroup_info : 1; - uint32_t enable_vgpr_workitem_id_y : 1; - uint32_t enable_vgpr_workitem_id_z : 1; + uint32_t enable_vgpr_workitem_id : 2; uint32_t enable_exception_address_watch : 1; uint32_t enable_exception_memory_violation : 1; uint32_t granulated_lds_size : 9; -- 2.30.2