From c923de68dd0ab10a5a5fb3196f539707d046d897 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 16 Mar 2020 18:44:18 +0100 Subject: [PATCH] radv/gfx10: fix required ballot size with VK_EXT_subgroup_size_control If compute shaders require a specific subgroup size (ie. Wave32), we have to use the correct ballot size. Fixes dEQP-VK.subgroups.ballot_other.compute.*_requiredsubgroupSize. Fixes: fb07fd4e6cb ("radv: implement VK_EXT_subgroup_size_control") Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Tested-by: Marge Bot Part-of: --- src/amd/vulkan/radv_nir_to_llvm.c | 3 ++- src/amd/vulkan/radv_pipeline.c | 24 +++++++++++++++++++++--- src/amd/vulkan/radv_shader.c | 4 ++-- src/amd/vulkan/radv_shader.h | 3 ++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d833bc2477d..7cb8deddc10 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3925,7 +3925,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family, float_mode, - args->shader_info->wave_size, 64); + args->shader_info->wave_size, + args->shader_info->ballot_bit_size); ctx.context = ctx.ac.context; ctx.max_workgroup_size = 0; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6d71d89ea58..ef88dfe9468 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2530,6 +2530,17 @@ radv_get_wave_size(struct radv_device *device, return device->physical_device->ge_wave_size; } +static uint8_t +radv_get_ballot_bit_size(struct radv_device *device, + const VkPipelineShaderStageCreateInfo *pStage, + gl_shader_stage stage, + const struct radv_shader_variant_key *key) +{ + if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size) + return key->cs.subgroup_size; + return 64; +} + static void radv_fill_shader_info(struct radv_pipeline *pipeline, const VkPipelineShaderStageCreateInfo **pStages, @@ -2642,10 +2653,15 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, } for (int i = 0; i < MESA_SHADER_STAGES; i++) { - if (nir[i]) + if (nir[i]) { infos[i].wave_size = radv_get_wave_size(pipeline->device, pStages[i], i, &keys[i]); + infos[i].ballot_bit_size = + radv_get_ballot_bit_size(pipeline->device, + pStages[i], i, + &keys[i]); + } } } @@ -2788,7 +2804,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { const VkPipelineShaderStageCreateInfo *stage = pStages[i]; - unsigned subgroup_size = 64; + unsigned subgroup_size = 64, ballot_bit_size = 64; if (!modules[i]) continue; @@ -2802,13 +2818,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline, assert(device->physical_device->rad_info.chip_class >= GFX10 && i == MESA_SHADER_COMPUTE); subgroup_size = key->compute_subgroup_size; + ballot_bit_size = key->compute_subgroup_size; } nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i, stage ? stage->pSpecializationInfo : NULL, flags, pipeline->layout, - subgroup_size); + subgroup_size, ballot_bit_size); /* We don't want to alter meta shaders IR directly so clone it * first. @@ -2888,6 +2905,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->layout, &key, &info); info.wave_size = 64; /* Wave32 not supported. */ + info.ballot_bit_size = 64; pipeline->gs_copy_shader = radv_create_gs_copy_shader( device, nir[MESA_SHADER_GEOMETRY], &info, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 4132dce1aee..98c98db5665 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -293,7 +293,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags, const struct radv_pipeline_layout *layout, - unsigned subgroup_size) + unsigned subgroup_size, unsigned ballot_bit_size) { nir_shader *nir; const nir_shader_compiler_options *nir_options = @@ -483,7 +483,7 @@ radv_shader_compile_to_nir(struct radv_device *device, bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7; nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { .subgroup_size = subgroup_size, - .ballot_bit_size = 64, + .ballot_bit_size = ballot_bit_size, .lower_to_scalar = 1, .lower_subgroup_masks = 1, .lower_shuffle = 1, diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 255e4ee277c..99644b1ebf0 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -236,6 +236,7 @@ struct radv_shader_info { bool uses_invocation_id; bool uses_prim_id; uint8_t wave_size; + uint8_t ballot_bit_size; struct radv_userdata_locations user_sgprs_locs; unsigned num_user_sgprs; unsigned num_input_sgprs; @@ -404,7 +405,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags, const struct radv_pipeline_layout *layout, - unsigned subgroup_size); + unsigned subgroup_size, unsigned ballot_bit_size); void * radv_alloc_shader_memory(struct radv_device *device, -- 2.30.2