From: Michael LeBeane Date: Thu, 18 Oct 2018 21:45:27 +0000 (-0400) Subject: gpu_compute: Support loading BLIT kernels X-Git-Tag: v20.1.0.0~435 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1d816250f87d950e1dde244c0abc3f846ce37b15;p=gem5.git gpu_compute: Support loading BLIT kernels The BLIT kernels used to implement DMA through the shaders don't fill out all of the standard fields in an amd_kernel_code_t object. This patch modifies the code object parsing logic to support these new kernels. BLIT kernels are used in APUs when using ROCm memcopies for certain size buffers, and are used for dGPUs when the SDMA engines are disabled. Change-Id: Id4e667474d05e311097dbec443def07dfad14a79 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29959 Maintainer: Anthony Gutierrez Tested-by: kokoro Reviewed-by: Matt Sinclair --- diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index b6205ac13..fccc03582 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -100,11 +100,25 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id, machine_code_addr); Addr kern_name_addr(0); - virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, - (uint8_t*)&kern_name_addr, 0x8); - std::string kernel_name; - virt_proxy.readString(kernel_name, kern_name_addr); + + /** + * BLIT kernels don't have symbol names. BLIT kernels are built-in compute + * kernels issued by ROCm to handle DMAs for dGPUs when the SDMA + * hardware engines are unavailable or explicitly disabled. They can also + * be used to do copies that ROCm things would be better performed + * by the shader than the SDMA engines. They are also sometimes used on + * APUs to implement asynchronous memcopy operations from 2 pointers in + * host memory. I have no idea what BLIT stands for. + * */ + if (akc.runtime_loader_kernel_symbol) { + virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, + (uint8_t*)&kern_name_addr, 0x8); + + virt_proxy.readString(kernel_name, kern_name_addr); + } else { + kernel_name = "Blit kernel"; + } DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str()); diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index a6917db3e..5fc5e56c2 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -88,6 +88,19 @@ class HSAQueueEntry _globalWgId(0), dispatchComplete(false) { + // Precompiled BLIT kernels actually violate the spec a bit + // and don't set many of the required akc fields. For these kernels, + // we need to rip register usage from the resource registers. + // + // We can't get an exact number of registers from the resource + // registers because they round, but we can get an upper bound on it + if (!numVgprs) + numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4; + + // TODO: Granularity changes for GFX9! + if (!numSgprs) + numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8; + initialVgprState.reset(); initialSgprState.reset();