radeonsi/gfx10: implement si_shader_vs
authorMarek Olšák <marek.olsak@amd.com>
Mon, 24 Jun 2019 21:39:39 +0000 (17:39 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
Only used with tessellation + GS instancing.

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 1dec250e6d3dc020cf831572bcd4c9ca7e663e67..9a4a416513b5b40abd8384e63f5cb3dcdeafb2ff 100644 (file)
@@ -3656,7 +3656,8 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
                }
        }
 
-       if (ctx->shader->selector->so.num_outputs)
+       if (ctx->ac.chip_class <= GFX9 &&
+           ctx->shader->selector->so.num_outputs)
                si_llvm_emit_streamout(ctx, outputs, i, 0);
 
        /* Export PrimitiveID. */
@@ -4448,7 +4449,8 @@ static void declare_streamout_params(struct si_shader_context *ctx,
                                     struct pipe_stream_output_info *so,
                                     struct si_function_info *fninfo)
 {
-       int i;
+       if (ctx->ac.chip_class >= GFX10)
+               return;
 
        /* Streamout SGPRs. */
        if (so->num_outputs) {
@@ -4460,7 +4462,7 @@ static void declare_streamout_params(struct si_shader_context *ctx,
                ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
        }
        /* A streamout buffer offset is loaded if the stride is non-zero. */
-       for (i = 0; i < 4; i++) {
+       for (int i = 0; i < 4; i++) {
                if (!so->stride[i])
                        continue;
 
@@ -5789,7 +5791,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                }
 
                /* Streamout and exports. */
-               if (gs_selector->so.num_outputs) {
+               if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs) {
                        si_llvm_emit_streamout(&ctx, outputs,
                                               gsinfo->num_outputs,
                                               stream);
index c96f7afc1bec38d6339fb341630c9008d6cb8bae..90d332dcebfda60cfece64ad918e7cb616811baf 100644 (file)
@@ -5565,6 +5565,7 @@ static void si_init_config(struct si_context *sctx)
                 */
                si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL,
                               S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+               si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
                si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
                               sscreen->info.pa_sc_tile_steering_override);
 
index 5e1a166f391bb316ed8eb5853d541f29c85ce4b5..cdcd87f12d1b474b9df0fcff8d228a3e433b698f 100644 (file)
@@ -418,7 +418,8 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
 {
        unsigned type = sel->type;
 
-       if (sscreen->info.family < CHIP_POLARIS10)
+       if (sscreen->info.family < CHIP_POLARIS10 ||
+           sscreen->info.chip_class >= GFX10)
                return;
 
        /* VS as VS, or VS as ES: */
@@ -1371,21 +1372,27 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
 
        si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
        si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40));
-       si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
-                      S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
-                      S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
-                      S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
-                      S_00B128_DX10_CLAMP(1) |
-                      S_00B128_FLOAT_MODE(shader->config.float_mode));
-       si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
-                      S_00B12C_USER_SGPR(num_user_sgprs) |
-                      S_00B12C_OC_LDS_EN(oc_lds_en) |
-                      S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
-                      S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
-                      S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
-                      S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
-                      S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
-                      S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
+
+       uint32_t rsrc1 = S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
+                        S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
+                        S_00B128_DX10_CLAMP(1) |
+                        S_00B128_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
+                        S_00B128_FLOAT_MODE(shader->config.float_mode);
+       uint32_t rsrc2 = S_00B12C_USER_SGPR(num_user_sgprs) |
+                        S_00B12C_OC_LDS_EN(oc_lds_en) |
+                        S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
+
+       if (sscreen->info.chip_class <= GFX9) {
+               rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
+               rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
+                        S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
+                        S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
+                        S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
+                        S_00B12C_SO_EN(!!shader->selector->so.num_outputs);
+       }
+
+       si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
+       si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2);
 
        if (window_space)
                shader->ctx_reg.vs.pa_cl_vte_cntl =