radv: hardcode shader WAVE_LIMIT to the maximum value
authorAndres Rodriguez <andresx7@gmail.com>
Fri, 20 Oct 2017 22:02:12 +0000 (18:02 -0400)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fri, 20 Oct 2017 23:01:44 +0000 (01:01 +0200)
When WAVE_LIMIT is set, a submission will opt-in for SPI based resource
scheduling. Because this mechanism is cooperative, we must ensure that
all submissions have this field set, otherwise they will bypass resource
arbitration.

We always hardcode the field to its maximum value, instead of attempting
to calculate an approximate usage. In testing, there were no benefits to
using anything other than the maximum.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/si_cmd_buffer.c

index de3b3883fc8963bf2a0ea4fdcbcee77bcef12381..ac3dff8e9eb785993105920d53a14f1ddb1d5f35 100644 (file)
@@ -179,7 +179,8 @@ si_emit_compute(struct radv_physical_device *physical_device,
        radeon_emit(cs, 0);
        radeon_emit(cs, 0);
 
-       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 3);
+       radeon_set_sh_reg_seq(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+                             S_00B854_WAVES_PER_SH(0x3));
        radeon_emit(cs, 0);
        /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1 */
        radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
@@ -432,11 +433,15 @@ si_emit_config(struct radv_physical_device *physical_device,
 
        if (physical_device->rad_info.chip_class >= CIK) {
                if (physical_device->rad_info.chip_class >= GFX9) {
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
                } else {
-                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
-                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+                                         S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+                                         S_00B41C_WAVE_LIMIT(0x3F));
+                       radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+                                         S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
                        /* If this is 0, Bonaire can hang even if GS isn't being used.
                         * Other chips are unaffected. These are suboptimal values,
                         * but we don't use on-chip GS.
@@ -445,7 +450,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                                               S_028A44_ES_VERTS_PER_SUBGRP(64) |
                                               S_028A44_GS_PRIMS_PER_SUBGRP(4));
                }
-               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
+                                 S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
 
                if (physical_device->rad_info.num_good_compute_units /
                    (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
@@ -455,7 +461,8 @@ si_emit_config(struct radv_physical_device *physical_device,
                         *
                         * LATE_ALLOC_VS = 2 is the highest safe number.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xffff) | S_00B118_WAVE_LIMIT(0x3F) );
                        radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
                } else {
                        /* Set LATE_ALLOC_VS == 31. It should be less than
@@ -463,11 +470,13 @@ si_emit_config(struct radv_physical_device *physical_device,
                         * - VS can't execute on CU0.
                         * - If HS writes outputs to LDS, LS can't execute on CU0.
                         */
-                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
+                       radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
+                                         S_00B118_CU_EN(0xfffe) | S_00B118_WAVE_LIMIT(0x3F));
                        radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
                }
 
-               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
+               radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
+                                 S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
        }
 
        if (physical_device->rad_info.chip_class >= VI) {