radeonsi: enable CU0 in each SE for LS-HS execution
authorMarek Olšák <marek.olsak@amd.com>
Tue, 28 Jun 2016 11:15:45 +0000 (13:15 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 29 Jun 2016 14:34:22 +0000 (16:34 +0200)
Offchip-only tessellation allows this.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_state.c

index b21fa5ca7abdbd32532a29dfe72f6211f9f3e29f..54febce42261a102b50c6fd2740f9ac4decdec05 100644 (file)
@@ -3829,6 +3829,7 @@ static void si_init_config(struct si_context *sctx)
        si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
 
        if (sctx->b.chip_class >= CIK) {
+               si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
                si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
                si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
                si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
@@ -3841,7 +3842,6 @@ static void si_init_config(struct si_context *sctx)
                         *
                         * LATE_ALLOC_VS = 2 is the highest safe number.
                         */
-                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
                        si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
                        si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
                } else {
@@ -3850,7 +3850,6 @@ static void si_init_config(struct si_context *sctx)
                         * - VS can't execute on CU0.
                         * - If HS writes outputs to LDS, LS can't execute on CU0.
                         */
-                       si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
                        si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
                        si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
                }