radv/gfx10: add Wave32 support for fragment shaders
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 1 Aug 2019 08:43:41 +0000 (10:43 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 2 Aug 2019 07:37:34 +0000 (09:37 +0200)
It can be enabled with RADV_PERFTEST=pswave32.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index 6414e882676b2c9a53833f92e8b341b5ee20515f..65dbec6e90d43527825e6d176a4a79d9e029db1b 100644 (file)
@@ -65,6 +65,7 @@ enum {
        RADV_PERFTEST_SHADER_BALLOT  =  0x40,
        RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
        RADV_PERFTEST_CS_WAVE_32     = 0x100,
+       RADV_PERFTEST_PS_WAVE_32     = 0x200,
 };
 
 bool
index 29be192443a9f6927e03963a0fff6ac33cb76a17..b66b15edf7323c93ef04b6fa0cb84ad1808e31f9 100644 (file)
@@ -385,10 +385,15 @@ radv_physical_device_init(struct radv_physical_device *device,
 
        /* Determine the number of threads per wave for all stages. */
        device->cs_wave_size = 64;
+       device->ps_wave_size = 64;
 
        if (device->rad_info.chip_class >= GFX10) {
                if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
                        device->cs_wave_size = 32;
+
+               /* For pixel shaders, wave64 is recommanded. */
+               if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
+                       device->ps_wave_size = 32;
        }
 
        radv_physical_device_init_mem_types(device);
@@ -503,6 +508,7 @@ static const struct debug_control radv_perftest_options[] = {
        {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
        {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
        {"cswave32", RADV_PERFTEST_CS_WAVE_32},
+       {"pswave32", RADV_PERFTEST_PS_WAVE_32},
        {NULL, 0}
 };
 
index bb78bcccf0e1ebe409fb6b06a5a7ec24c40e5ff2..bba5849b152cc5de915d055e59f4f6c68305ee92 100644 (file)
@@ -4323,6 +4323,8 @@ radv_nir_shader_wave_size(struct nir_shader *const *shaders, int shader_count,
 {
        if (shaders[0]->info.stage == MESA_SHADER_COMPUTE)
                return options->cs_wave_size;
+       else if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
+               return options->ps_wave_size;
        return 64;
 }
 
index d62066cbee4769d05521d18a54c3f08843d52987..dbfe261c98278e966e5669def1d8fd0a97596924 100644 (file)
@@ -4060,7 +4060,8 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
                               ps->config.spi_ps_input_addr);
 
        radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
-                              S_0286D8_NUM_INTERP(ps->info.fs.num_interp));
+                              S_0286D8_NUM_INTERP(ps->info.fs.num_interp) |
+                              S_0286D8_PS_W32_EN(pipeline->device->physical_device->ps_wave_size == 32));
 
        radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
 
index 143c09811c8fe4c952666cbb8af47b00b4a0ff61..a1347060190021a2988a0934f609d53921b26a4a 100644 (file)
@@ -302,6 +302,7 @@ struct radv_physical_device {
        bool has_dcc_constant_encode;
 
        /* Number of threads per wave. */
+       uint8_t ps_wave_size;
        uint8_t cs_wave_size;
 
        /* This is the drivers on-disk cache used as a fallback as opposed to
index 9c88ab551bb4d3ed16c2deb03a6f0a3af7c79251..48ed86c99b1f425ac3697b0cd4f06e73e4cfae40 100644 (file)
@@ -673,7 +673,8 @@ radv_get_shader_wave_size(const struct radv_physical_device *pdevice,
 {
        if (stage == MESA_SHADER_COMPUTE)
                return pdevice->cs_wave_size;
-
+       else if (stage == MESA_SHADER_FRAGMENT)
+               return pdevice->ps_wave_size;
        return 64;
 }
 
@@ -1142,6 +1143,7 @@ shader_variant_compile(struct radv_device *device,
        options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
        options->address32_hi = device->physical_device->rad_info.address32_hi;
        options->cs_wave_size = device->physical_device->cs_wave_size;
+       options->ps_wave_size = device->physical_device->ps_wave_size;
 
        if (options->supports_spill)
                tm_options |= AC_TM_SUPPORTS_SPILL;
index 92ae2a7259d29b6ec6da8528507568f38c2f09d2..0ef49628b5d38a7003ade088a1e6294c80b902bf 100644 (file)
@@ -129,6 +129,7 @@ struct radv_nir_compiler_options {
        uint32_t tess_offchip_block_dw_size;
        uint32_t address32_hi;
        uint8_t cs_wave_size;
+       uint8_t ps_wave_size;
 };
 
 enum radv_ud_index {