radv/gfx10: fix required subgroup size with VK_EXT_subgroup_size_control
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Mar 2020 16:29:33 +0000 (17:29 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 17 Mar 2020 12:45:01 +0000 (12:45 +0000)
If compute shaders require a specific subgroup size (ie. Wave32),
we have to return the correct one.

Fixes dEQP-VK.subgroups.size_control.compute.required_subgroup_size_*.

Fixes: fb07fd4e6cb ("radv: implement VK_EXT_subgroup_size_control")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4215>

src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index 2bd4e2b4d84f4de0b1f745ae6cb346e9f49ad1f1..6d71d89ea588b15b1a1e69e57edbf1dbe7108f04 100644 (file)
@@ -2788,16 +2788,27 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+               unsigned subgroup_size = 64;
 
                if (!modules[i])
                        continue;
 
                radv_start_feedback(stage_feedbacks[i]);
 
+               if (key->compute_subgroup_size) {
+                       /* Only GFX10+ and compute shaders currently support
+                        * requiring a specific subgroup size.
+                        */
+                       assert(device->physical_device->rad_info.chip_class >= GFX10 &&
+                              i == MESA_SHADER_COMPUTE);
+                       subgroup_size = key->compute_subgroup_size;
+               }
+
                nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                    stage ? stage->pName : "main", i,
                                                    stage ? stage->pSpecializationInfo : NULL,
-                                                   flags, pipeline->layout);
+                                                   flags, pipeline->layout,
+                                                   subgroup_size);
 
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
index 30e46340520293d40d80f2f210d6fec743c38fba..4132dce1aee723b87940b64584a5c531a8bc9a16 100644 (file)
@@ -292,7 +292,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           gl_shader_stage stage,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
-                          const struct radv_pipeline_layout *layout)
+                          const struct radv_pipeline_layout *layout,
+                          unsigned subgroup_size)
 {
        nir_shader *nir;
        const nir_shader_compiler_options *nir_options =
@@ -481,7 +482,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
        nir_remove_dead_variables(nir, nir_var_function_temp);
        bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
        nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
-                       .subgroup_size = 64,
+                       .subgroup_size = subgroup_size,
                        .ballot_bit_size = 64,
                        .lower_to_scalar = 1,
                        .lower_subgroup_masks = 1,
index 4e4ba73dced42f96f0e957f6fda6069e55e3a10b..255e4ee277c587e88929002e7a6ded0375ffffe8 100644 (file)
@@ -403,7 +403,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           gl_shader_stage stage,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
-                          const struct radv_pipeline_layout *layout);
+                          const struct radv_pipeline_layout *layout,
+                          unsigned subgroup_size);
 
 void *
 radv_alloc_shader_memory(struct radv_device *device,