radv/gfx10: enable wave32 for compute based on shader's wavesize
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 31 Oct 2019 08:30:47 +0000 (09:30 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 6 Nov 2019 08:20:30 +0000 (09:20 +0100)
This will allow to change wavesize on-demand.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_pipeline.c

index a172767e3dde1dae4c273335a96c75b240e42576..87320e6d8222a95864d6bad319ecb5a5bc7b2361 100644 (file)
@@ -4850,6 +4850,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 
        ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
 
+       if (compute_shader->info.wave_size == 32) {
+               assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+               dispatch_initiator |= S_00B800_CS_W32_EN(1);
+       }
+
        if (info->indirect) {
                uint64_t va = radv_buffer_get_va(info->indirect->bo);
 
index e35c3e43d83b8082b336d6b2a7edfd99c358f8d9..ec59bfb1ea94ee66c66750a0110064a56cd6f10b 100644 (file)
@@ -2486,8 +2486,7 @@ VkResult radv_CreateDevice(
        device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
                                     max_threads_per_block / 64);
 
-       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
-                                    S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32);
+       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
 
        if (device->physical_device->rad_info.chip_class >= GFX7) {
                /* If the KMD allows it (there is a KMD hw register for it),
index ee0a24387882f945c26aa47a64e76e13dee24c72..8932d9fa54dfccd7c327ff39d04a6136c76e3b4a 100644 (file)
@@ -5042,7 +5042,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
                                  compute_shader->info.cs.block_size[1] *
                                  compute_shader->info.cs.block_size[2];
        waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup,
-                                            device->physical_device->cs_wave_size);
+                                            compute_shader->info.wave_size);
 
        if (device->physical_device->rad_info.chip_class >= GFX10 &&
            waves_per_threadgroup == 1)