radv/gfx10: fix required ballot size with VK_EXT_subgroup_size_control
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Mar 2020 17:44:18 +0000 (18:44 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 17 Mar 2020 12:45:01 +0000 (12:45 +0000)
If compute shaders require a specific subgroup size (ie. Wave32),
we have to use the correct ballot size.

Fixes dEQP-VK.subgroups.ballot_other.compute.*_requiredsubgroupSize.

Fixes: fb07fd4e6cb ("radv: implement VK_EXT_subgroup_size_control")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4215>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4215>

src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index d833bc2477dc5b945d5f28f8742898782cfe241b..7cb8deddc105f5ca45437317d974b01341e03c61 100644 (file)
@@ -3925,7 +3925,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 
        ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
                             args->options->family, float_mode,
-                            args->shader_info->wave_size, 64);
+                            args->shader_info->wave_size,
+                            args->shader_info->ballot_bit_size);
        ctx.context = ctx.ac.context;
 
        ctx.max_workgroup_size = 0;
index 6d71d89ea588b15b1a1e69e57edbf1dbe7108f04..ef88dfe9468127da496a3ebd194457ea3a03834a 100644 (file)
@@ -2530,6 +2530,17 @@ radv_get_wave_size(struct radv_device *device,
                return device->physical_device->ge_wave_size;
 }
 
+static uint8_t
+radv_get_ballot_bit_size(struct radv_device *device,
+                        const VkPipelineShaderStageCreateInfo *pStage,
+                        gl_shader_stage stage,
+                        const struct radv_shader_variant_key *key)
+{
+       if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
+               return key->cs.subgroup_size;
+       return 64;
+}
+
 static void
 radv_fill_shader_info(struct radv_pipeline *pipeline,
                      const VkPipelineShaderStageCreateInfo **pStages,
@@ -2642,10 +2653,15 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
        }
 
        for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-               if (nir[i])
+               if (nir[i]) {
                        infos[i].wave_size =
                                radv_get_wave_size(pipeline->device, pStages[i],
                                                   i, &keys[i]);
+                       infos[i].ballot_bit_size =
+                               radv_get_ballot_bit_size(pipeline->device,
+                                                        pStages[i], i,
+                                                        &keys[i]);
+               }
        }
 }
 
@@ -2788,7 +2804,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                const VkPipelineShaderStageCreateInfo *stage = pStages[i];
-               unsigned subgroup_size = 64;
+               unsigned subgroup_size = 64, ballot_bit_size = 64;
 
                if (!modules[i])
                        continue;
@@ -2802,13 +2818,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        assert(device->physical_device->rad_info.chip_class >= GFX10 &&
                               i == MESA_SHADER_COMPUTE);
                        subgroup_size = key->compute_subgroup_size;
+                       ballot_bit_size = key->compute_subgroup_size;
                }
 
                nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                    stage ? stage->pName : "main", i,
                                                    stage ? stage->pSpecializationInfo : NULL,
                                                    flags, pipeline->layout,
-                                                   subgroup_size);
+                                                   subgroup_size, ballot_bit_size);
 
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
@@ -2888,6 +2905,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                                  pipeline->layout, &key,
                                                  &info);
                        info.wave_size = 64; /* Wave32 not supported. */
+                       info.ballot_bit_size = 64;
 
                        pipeline->gs_copy_shader = radv_create_gs_copy_shader(
                                        device, nir[MESA_SHADER_GEOMETRY], &info,
index 4132dce1aee723b87940b64584a5c531a8bc9a16..98c98db5665402303bb8bc3216a44cfc64cd09f1 100644 (file)
@@ -293,7 +293,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
                           const struct radv_pipeline_layout *layout,
-                          unsigned subgroup_size)
+                          unsigned subgroup_size, unsigned ballot_bit_size)
 {
        nir_shader *nir;
        const nir_shader_compiler_options *nir_options =
@@ -483,7 +483,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
        bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
        nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
                        .subgroup_size = subgroup_size,
-                       .ballot_bit_size = 64,
+                       .ballot_bit_size = ballot_bit_size,
                        .lower_to_scalar = 1,
                        .lower_subgroup_masks = 1,
                        .lower_shuffle = 1,
index 255e4ee277c587e88929002e7a6ded0375ffffe8..99644b1ebf016bd3448b6b7e520e0f64b8d41eec 100644 (file)
@@ -236,6 +236,7 @@ struct radv_shader_info {
        bool uses_invocation_id;
        bool uses_prim_id;
        uint8_t wave_size;
+       uint8_t ballot_bit_size;
        struct radv_userdata_locations user_sgprs_locs;
        unsigned num_user_sgprs;
        unsigned num_input_sgprs;
@@ -404,7 +405,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
                           const struct radv_pipeline_layout *layout,
-                          unsigned subgroup_size);
+                          unsigned subgroup_size, unsigned ballot_bit_size);
 
 void *
 radv_alloc_shader_memory(struct radv_device *device,