From 0588146cb096194998181a68d21640f0ea00bf75 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 15 Feb 2017 11:57:47 +0100 Subject: [PATCH] radeonsi/gfx9: set up shader registers for merged LS-HS MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_pipe.h | 3 +- src/gallium/drivers/radeonsi/si_state_draw.c | 31 ++++++++++++---- .../drivers/radeonsi/si_state_shaders.c | 36 +++++++++++++++---- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09788316a49..918aa0f0717 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -362,7 +362,8 @@ struct si_context { struct r600_resource *compute_scratch_buffer; /* Emitted derived tessellation state. */ - struct si_shader *last_ls; /* local shader (VS) */ + /* Local shader (VS), or HS if LS-HS are merged. */ + struct si_shader *last_ls; struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c24d6076a09..4feadbe3d65 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -96,7 +96,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_patches) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - struct si_shader_ctx_state *ls = &sctx->vs_shader; + struct si_shader *ls_current; + struct si_shader_selector *ls; /* The TES pointer will only be used for sctx->last_tcs. * It would be wrong to think that TCS = TES. */ struct si_shader_selector *tcs = @@ -111,7 +112,20 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; unsigned offchip_layout, hardware_lds_size, ls_hs_config; - if (sctx->last_ls == ls->current && + /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */ + if (sctx->b.chip_class >= GFX9) { + if (sctx->tcs_shader.cso) + ls_current = sctx->tcs_shader.current; + else + ls_current = sctx->fixed_func_tcs_shader.current; + + ls = ls_current->key.part.tcs.ls; + } else { + ls_current = sctx->vs_shader.current; + ls = sctx->vs_shader.cso; + } + + if (sctx->last_ls == ls_current && sctx->last_tcs == tcs && sctx->last_tes_sh_base == tes_sh_base && sctx->last_num_tcs_input_cp == num_tcs_input_cp) { @@ -119,14 +133,14 @@ static void si_emit_derived_tess_state(struct si_context *sctx, return; } - sctx->last_ls = ls->current; + sctx->last_ls = ls_current; sctx->last_tcs = tcs; sctx->last_tes_sh_base = tes_sh_base; sctx->last_num_tcs_input_cp = num_tcs_input_cp; /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); + num_tcs_inputs = util_last_bit64(ls->outputs_written); if (sctx->tcs_shader.cso) { num_tcs_outputs = util_last_bit64(tcs->outputs_written); @@ -217,9 +231,12 @@ static void si_emit_derived_tess_state(struct si_context *sctx, sctx->current_vs_state |= tcs_in_layout; if (sctx->b.chip_class >= GFX9) { - // TODO + unsigned hs_rsrc2 = ls_current->config.rsrc2 | + S_00B42C_LDS_SIZE(lds_size); + + radeon_set_sh_reg(cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2); } else { - unsigned ls_rsrc2 = ls->current->config.rsrc2; + unsigned ls_rsrc2 = ls_current->config.rsrc2; si_multiwave_lds_size_workaround(sctx->screen, &lds_size); ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); @@ -229,7 +246,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); - radeon_emit(cs, ls->current->config.rsrc1); + radeon_emit(cs, ls_current->config.rsrc1); radeon_emit(cs, ls_rsrc2); /* Set userdata SGPRs for TCS. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 4403903eab9..9e56a45ab77 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -474,6 +474,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; uint64_t va; + unsigned ls_vgpr_comp_cnt = 0; pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -482,17 +483,38 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); - si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); - si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); + if (sscreen->b.chip_class >= GFX9) { + si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); + si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40); + + /* We need at least 2 components for LS. + * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ + ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; + + shader->config.rsrc2 = + S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | + S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); + } else { + si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); + si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); + + shader->config.rsrc2 = + S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | + S_00B42C_OC_LDS_EN(1) | + S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); + } + si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B428_DX10_CLAMP(1) | - S_00B428_FLOAT_MODE(shader->config.float_mode)); - si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, - S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | - S_00B42C_OC_LDS_EN(sscreen->b.chip_class <= VI) | - S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); + S_00B428_FLOAT_MODE(shader->config.float_mode) | + S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt)); + + if (sscreen->b.chip_class <= VI) { + si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, + shader->config.rsrc2); + } } static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) -- 2.30.2