radeonsi/gfx9: define and set LS-HS user SGPRs
authorMarek Olšák <marek.olsak@amd.com>
Wed, 15 Feb 2017 21:47:57 +0000 (22:47 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index bd73fcc3f9fc6108a479084b1660b3fd04cb2207..f04ed8794f799f29b7853e058865aa0b27a4350b 100644 (file)
@@ -1986,7 +1986,11 @@ void si_init_all_descriptors(struct si_context *sctx)
        int i;
        unsigned ce_offset = 0;
 
+       STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
+
        for (i = 0; i < SI_NUM_SHADERS; i++) {
+               bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
+                               i == PIPE_SHADER_TESS_CTRL;
                /* GFX9 has only 4KB of CE, while previous chips had 32KB.
                 * Rarely used descriptors don't use CE RAM.
                 */
@@ -1999,22 +2003,30 @@ void si_init_all_descriptors(struct si_context *sctx)
 
                si_init_buffer_resources(&sctx->const_buffers[i],
                                         si_const_buffer_descriptors(sctx, i),
-                                        SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
+                                        SI_NUM_CONST_BUFFERS,
+                                        gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
+                                                   SI_SGPR_CONST_BUFFERS,
                                         RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
                                         &ce_offset);
                si_init_buffer_resources(&sctx->shader_buffers[i],
                                         si_shader_buffer_descriptors(sctx, i),
-                                        SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+                                        SI_NUM_SHADER_BUFFERS,
+                                        gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
+                                                   SI_SGPR_SHADER_BUFFERS,
                                         RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
                                         shaderbufs_use_ce ? &ce_offset : NULL);
 
                si_init_descriptors(si_sampler_descriptors(sctx, i),
-                                   SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+                                   gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
+                                              SI_SGPR_SAMPLERS,
+                                   16, SI_NUM_SAMPLERS,
                                    null_texture_descriptor,
                                    samplers_use_ce ? &ce_offset : NULL);
 
                si_init_descriptors(si_image_descriptors(sctx, i),
-                                   SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+                                   gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
+                                              SI_SGPR_IMAGES,
+                                   8, SI_NUM_IMAGES,
                                    null_image_descriptor,
                                    images_use_ce ? &ce_offset : NULL);
        }
index b7d3514def2f94db281bf16bcafcfa1ab425eaca..ed5327cbe9bfc89bcf1e74ce193991da777b478a 100644 (file)
@@ -2648,18 +2648,18 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
        tf_soffset = LLVMGetParam(ctx->main_fn,
                                  SI_PARAM_TESS_FACTOR_OFFSET);
        ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
-                                  SI_SGPR_TCS_OFFCHIP_LAYOUT, "");
+                                  GFX6_SGPR_TCS_OFFCHIP_LAYOUT, "");
        ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
-                                  SI_TCS_NUM_USER_SGPR, "");
+                                  GFX6_TCS_NUM_USER_SGPR, "");
        ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
-                                  SI_TCS_NUM_USER_SGPR + 1, "");
+                                  GFX6_TCS_NUM_USER_SGPR + 1, "");
 
        /* VGPRs */
        rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
        invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
        tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
 
-       vgpr = SI_TCS_NUM_USER_SGPR + 2;
+       vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
        ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
        ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
        ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
@@ -5715,7 +5715,7 @@ static void create_function(struct si_shader_context *ctx)
                /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
                 * placed after the user SGPRs.
                 */
-               for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
+               for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
                        returns[num_returns++] = ctx->i32; /* SGPRs */
 
                for (i = 0; i < 3; i++)
index 90d8a18e17484014d57a9ff81a1842af41452bae..df61a418d0f8322bca919de02bc3bac4ee96231e 100644 (file)
@@ -101,15 +101,31 @@ enum {
        SI_SGPR_VS_STATE_BITS,
        SI_VS_NUM_USER_SGPR,
 
-       /* both TCS and TES */
-       SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+       /* TES */
+       SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
        SI_TES_NUM_USER_SGPR,
 
-       /* TCS only */
-       SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
-       SI_SGPR_TCS_OUT_LAYOUT,
-       SI_SGPR_TCS_IN_LAYOUT,
-       SI_TCS_NUM_USER_SGPR,
+       /* GFX6-8: TCS only */
+       GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+       GFX6_SGPR_TCS_OUT_OFFSETS,
+       GFX6_SGPR_TCS_OUT_LAYOUT,
+       GFX6_SGPR_TCS_IN_LAYOUT,
+       GFX6_TCS_NUM_USER_SGPR,
+
+       /* GFX9: Merged LS-HS (VS-TCS) only. */
+       GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
+       GFX9_SGPR_TCS_OUT_OFFSETS,
+       GFX9_SGPR_TCS_OUT_LAYOUT,
+       GFX9_SGPR_unused_to_align_the_next_pointer,
+       GFX9_SGPR_TCS_CONST_BUFFERS,
+       GFX9_SGPR_TCS_CONST_BUFFERS_HI,
+       GFX9_SGPR_TCS_SAMPLERS,  /* images & sampler states interleaved */
+       GFX9_SGPR_TCS_SAMPLERS_HI,
+       GFX9_SGPR_TCS_IMAGES,
+       GFX9_SGPR_TCS_IMAGES_HI,
+       GFX9_SGPR_TCS_SHADER_BUFFERS,
+       GFX9_SGPR_TCS_SHADER_BUFFERS_HI,
+       GFX9_TCS_NUM_USER_SGPR,
 
        /* GS limits */
        SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
index 4feadbe3d65219a5f74199e8aab756fe984e289a..de97c0e8f0607789477a5da82bf609cc4bb93c23 100644 (file)
@@ -235,6 +235,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
                                    S_00B42C_LDS_SIZE(lds_size);
 
                radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
+
+               /* Set userdata SGPRs for merged LS-HS. */
+               radeon_set_sh_reg_seq(cs,
+                                     R_00B430_SPI_SHADER_USER_DATA_LS_0 +
+                                     GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3);
+               radeon_emit(cs, offchip_layout);
+               radeon_emit(cs, tcs_out_offsets);
+               radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
        } else {
                unsigned ls_rsrc2 = ls_current->config.rsrc2;
 
@@ -251,7 +259,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
                /* Set userdata SGPRs for TCS. */
                radeon_set_sh_reg_seq(cs,
-                       R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
+                       R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4);
                radeon_emit(cs, offchip_layout);
                radeon_emit(cs, tcs_out_offsets);
                radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
@@ -259,7 +267,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
        }
 
        /* Set userdata SGPRs for TES. */
-       radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1);
+       radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, 1);
        radeon_emit(cs, offchip_layout);
 
        ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) |
index 9e56a45ab77c666b538c28cfe0488657e5fd195e..b04ad920a905ba372d193dac05fcd0b24ee2dd55 100644 (file)
@@ -492,14 +492,15 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
                ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
 
                shader->config.rsrc2 =
-                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
+                       S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
                        S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
        } else {
                si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
                si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
 
                shader->config.rsrc2 =
-                       S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
+                       S_00B42C_USER_SGPR(GFX6_TCS_NUM_USER_SGPR) |
                        S_00B42C_OC_LDS_EN(1) |
                        S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
        }