radeonsi/gfx9: set up shader registers for merged LS-HS
authorMarek Olšák <marek.olsak@amd.com>
Wed, 15 Feb 2017 10:57:47 +0000 (11:57 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 09788316a4935f1a431efec052f5e1dc74bbbffb..918aa0f071719304931c1df3a17c530cde173187 100644 (file)
@@ -362,7 +362,8 @@ struct si_context {
        struct r600_resource    *compute_scratch_buffer;
 
        /* Emitted derived tessellation state. */
-       struct si_shader        *last_ls; /* local shader (VS) */
+       /* Local shader (VS), or HS if LS-HS are merged. */
+       struct si_shader        *last_ls;
        struct si_shader_selector *last_tcs;
        int                     last_num_tcs_input_cp;
        int                     last_tes_sh_base;
index c24d6076a096408b23734336bd04d729c770d4cc..4feadbe3d65219a5f74199e8aab756fe984e289a 100644 (file)
@@ -96,7 +96,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                                       unsigned *num_patches)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-       struct si_shader_ctx_state *ls = &sctx->vs_shader;
+       struct si_shader *ls_current;
+       struct si_shader_selector *ls;
        /* The TES pointer will only be used for sctx->last_tcs.
         * It would be wrong to think that TCS = TES. */
        struct si_shader_selector *tcs =
@@ -111,7 +112,20 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
        unsigned offchip_layout, hardware_lds_size, ls_hs_config;
 
-       if (sctx->last_ls == ls->current &&
+       /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
+       if (sctx->b.chip_class >= GFX9) {
+               if (sctx->tcs_shader.cso)
+                       ls_current = sctx->tcs_shader.current;
+               else
+                       ls_current = sctx->fixed_func_tcs_shader.current;
+
+               ls = ls_current->key.part.tcs.ls;
+       } else {
+               ls_current = sctx->vs_shader.current;
+               ls = sctx->vs_shader.cso;
+       }
+
+       if (sctx->last_ls == ls_current &&
            sctx->last_tcs == tcs &&
            sctx->last_tes_sh_base == tes_sh_base &&
            sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
@@ -119,14 +133,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                return;
        }
 
-       sctx->last_ls = ls->current;
+       sctx->last_ls = ls_current;
        sctx->last_tcs = tcs;
        sctx->last_tes_sh_base = tes_sh_base;
        sctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
        /* This calculates how shader inputs and outputs among VS, TCS, and TES
         * are laid out in LDS. */
-       num_tcs_inputs = util_last_bit64(ls->cso->outputs_written);
+       num_tcs_inputs = util_last_bit64(ls->outputs_written);
 
        if (sctx->tcs_shader.cso) {
                num_tcs_outputs = util_last_bit64(tcs->outputs_written);
@@ -217,9 +231,12 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        sctx->current_vs_state |= tcs_in_layout;
 
        if (sctx->b.chip_class >= GFX9) {
-               // TODO
+               unsigned hs_rsrc2 = ls_current->config.rsrc2 |
+                                   S_00B42C_LDS_SIZE(lds_size);
+
+               radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
        } else {
-               unsigned ls_rsrc2 = ls->current->config.rsrc2;
+               unsigned ls_rsrc2 = ls_current->config.rsrc2;
 
                si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
                ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
@@ -229,7 +246,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
                        radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
                radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
-               radeon_emit(cs, ls->current->config.rsrc1);
+               radeon_emit(cs, ls_current->config.rsrc1);
                radeon_emit(cs, ls_rsrc2);
 
                /* Set userdata SGPRs for TCS. */
index 4403903eab991279c9d0607ae1f26c521747c0fb..9e56a45ab77c666b538c28cfe0488657e5fd195e 100644 (file)
@@ -474,6 +474,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
        uint64_t va;
+       unsigned ls_vgpr_comp_cnt = 0;
 
        pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
@@ -482,17 +483,38 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
        va = shader->bo->gpu_address;
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
-       si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
-       si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+       if (sscreen->b.chip_class >= GFX9) {
+               si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
+               si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+               /* We need at least 2 components for LS.
+                * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+               ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
+
+               shader->config.rsrc2 =
+                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+       } else {
+               si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+               si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+
+               shader->config.rsrc2 =
+                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       S_00B42C_OC_LDS_EN(1) |
+                       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+       }
+
        si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
                       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
                       S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
                       S_00B428_DX10_CLAMP(1) |
-                      S_00B428_FLOAT_MODE(shader->config.float_mode));
-       si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
-                      S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
-                      S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) |
-                      S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
+                      S_00B428_FLOAT_MODE(shader->config.float_mode) |
+                      S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
+
+       if (sscreen->b.chip_class <= VI) {
+               si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+                              shader->config.rsrc2);
+       }
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)