From 74c1001d13f07538e349c157598f9de83f252c49 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 22 Feb 2015 18:01:18 +0100 Subject: [PATCH] radeonsi: add derived tessellation state MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_hw_context.c | 7 + src/gallium/drivers/radeonsi/si_pipe.h | 11 +- src/gallium/drivers/radeonsi/si_state_draw.c | 123 ++++++++++++++++++ .../drivers/radeonsi/si_state_shaders.c | 8 +- 4 files changed, 146 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index dc8702e95a6..b4c5b0a6f48 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -165,6 +165,9 @@ void si_begin_new_cs(struct si_context *ctx) r600_postflush_resume_features(&ctx->b); ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw; + + /* Invalidate various draw states so that they are emitted before + * the first draw call. */ si_invalidate_draw_sh_constants(ctx); ctx->last_primitive_restart_en = -1; ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; @@ -174,4 +177,8 @@ void si_begin_new_cs(struct si_context *ctx) ctx->last_rast_prim = -1; ctx->last_sc_line_stipple = ~0; ctx->emit_scratch_reloc = true; + ctx->last_ls = NULL; + ctx->last_tcs = NULL; + ctx->last_tes_sh_base = -1; + ctx->last_num_tcs_input_cp = -1; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 08eae960c06..ad3677435e4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -48,7 +48,8 @@ #define SI_MAX_DRAW_CS_DWORDS \ (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ - /*draw regs:*/ 16 + /*draw packets:*/ 31) + /*draw regs:*/ 16 + /*draw packets:*/ 31 +\ + /*derived tess state:*/ 19) /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -224,7 +225,7 @@ struct si_context { bool db_depth_disable_expclear; unsigned ps_db_shader_control; - /* Draw state. */ + /* Emitted draw state. */ int last_base_vertex; int last_start_instance; int last_sh_base_reg; @@ -241,6 +242,12 @@ struct si_context { boolean emit_scratch_reloc; unsigned scratch_waves; unsigned spi_tmpring_size; + + /* Emitted derived tessellation state. */ + struct si_shader *last_ls; /* local shader (VS) */ + struct si_shader_selector *last_tcs; + int last_num_tcs_input_cp; + int last_tes_sh_base; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index b877a7e0e95..fd507b0fa1d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -96,6 +96,125 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode) return prim_conv[mode]; } +/** + * This calculates the LDS size for tessellation shaders (VS, TCS, TES). + * LS.LDS_SIZE is shared by all 3 shader stages. + * + * The information about LDS and other non-compile-time parameters is then + * written to userdata SGPRs. + */ +static void si_emit_derived_tess_state(struct si_context *sctx, + const struct pipe_draw_info *info, + unsigned *num_patches) +{ + struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; + struct si_shader_selector *ls = sctx->vs_shader; + /* The TES pointer will only be used for sctx->last_tcs. + * It would be wrong to think that TCS = TES. */ + struct si_shader_selector *tcs = + sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader; + unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; + unsigned num_tcs_input_cp = info->vertices_per_patch; + unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; + unsigned num_tcs_patch_outputs; + unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size; + unsigned input_patch_size, output_patch_size, output_patch0_offset; + unsigned perpatch_output_offset, lds_size, ls_rsrc2; + unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; + + *num_patches = 1; /* TODO: calculate this */ + + if (sctx->last_ls == ls->current && + sctx->last_tcs == tcs && + sctx->last_tes_sh_base == tes_sh_base && + sctx->last_num_tcs_input_cp == num_tcs_input_cp) + return; + + sctx->last_ls = ls->current; + sctx->last_tcs = tcs; + sctx->last_tes_sh_base = tes_sh_base; + sctx->last_num_tcs_input_cp = num_tcs_input_cp; + + /* This calculates how shader inputs and outputs among VS, TCS, and TES + * are laid out in LDS. */ + num_tcs_inputs = util_last_bit64(ls->outputs_written); + + if (sctx->tcs_shader) { + num_tcs_outputs = util_last_bit64(tcs->outputs_written); + num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; + num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written); + } else { + /* No TCS. Route varyings from LS to TES. */ + num_tcs_outputs = num_tcs_inputs; + num_tcs_output_cp = num_tcs_input_cp; + num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ + } + + input_vertex_size = num_tcs_inputs * 16; + output_vertex_size = num_tcs_outputs * 16; + + input_patch_size = num_tcs_input_cp * input_vertex_size; + + pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; + output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + + output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0; + perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; + + lds_size = output_patch0_offset + output_patch_size * *num_patches; + ls_rsrc2 = ls->current->ls_rsrc2; + + if (sctx->b.chip_class >= CIK) { + assert(lds_size <= 65536); + ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); + } else { + assert(lds_size <= 32768); + ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); + } + + /* Due to a hw bug, RSRC2_LS must be written twice with another + * LS register written in between. */ + if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII) + si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); + si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); + radeon_emit(cs, ls->current->ls_rsrc1); + radeon_emit(cs, ls_rsrc2); + + /* Compute userdata SGPRs. */ + assert(((input_vertex_size / 4) & ~0xff) == 0); + assert(((output_vertex_size / 4) & ~0xff) == 0); + assert(((input_patch_size / 4) & ~0x1fff) == 0); + assert(((output_patch_size / 4) & ~0x1fff) == 0); + assert(((output_patch0_offset / 16) & ~0xffff) == 0); + assert(((perpatch_output_offset / 16) & ~0xffff) == 0); + assert(num_tcs_input_cp <= 32); + assert(num_tcs_output_cp <= 32); + + tcs_in_layout = (input_patch_size / 4) | + ((input_vertex_size / 4) << 13); + tcs_out_layout = (output_patch_size / 4) | + ((output_vertex_size / 4) << 13); + tcs_out_offsets = (output_patch0_offset / 16) | + ((perpatch_output_offset / 16) << 16); + + /* Set them for LS. */ + si_write_sh_reg(cs, + R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4, + tcs_in_layout); + + /* Set them for TCS. */ + si_write_sh_reg_seq(cs, + R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3); + radeon_emit(cs, tcs_out_offsets); + radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); + radeon_emit(cs, tcs_in_layout); + + /* Set them for TES. */ + si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2); + radeon_emit(cs, tcs_out_offsets); + radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26)); +} + static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, const struct pipe_draw_info *info) { @@ -208,8 +327,12 @@ static void si_emit_draw_registers(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; unsigned prim = si_conv_pipe_prim(info->mode); unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim); + unsigned num_patches = 0; unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info); + if (sctx->tes_shader) + si_emit_derived_tess_state(sctx, info, &num_patches); + /* Draw state. */ if (prim != sctx->last_prim || ia_multi_vgt_param != sctx->last_multi_vgt_param) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 76d590b4e6c..074e44a60e0 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -745,11 +745,15 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; struct si_shader_selector *sel = state; + bool enable_changed = !!sctx->tcs_shader != !!sel; if (sctx->tcs_shader == sel) return; sctx->tcs_shader = sel; + + if (enable_changed) + sctx->last_tcs = NULL; /* invalidate derived tess state */ } static void si_bind_tes_shader(struct pipe_context *ctx, void *state) @@ -765,8 +769,10 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) sctx->clip_regs.dirty = true; sctx->last_rast_prim = -1; /* reset this so that it gets updated */ - if (enable_changed) + if (enable_changed) { si_shader_change_notify(sctx); + sctx->last_tes_sh_base = -1; /* invalidate derived tess state */ + } } static void si_make_dummy_ps(struct si_context *sctx) -- 2.30.2