From fa476e0566f7324ebd8b0be8055333082eba61ef Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 11 Nov 2016 22:36:17 +0100 Subject: [PATCH] radeonsi: fast exit si_emit_derived_tess_state early MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Tested-by: Edmondo Tommasina Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 25 +++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8e6a94deade..df2f1303c2d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -336,6 +336,7 @@ struct si_context { struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; + unsigned last_num_patches; /* Debug state. */ bool is_debug; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index affc156a3d1..e904164fafa 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -108,6 +108,19 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; unsigned offchip_layout, hardware_lds_size, ls_hs_config; + if (sctx->last_ls == ls->current && + sctx->last_tcs == tcs && + sctx->last_tes_sh_base == tes_sh_base && + sctx->last_num_tcs_input_cp == num_tcs_input_cp) { + *num_patches = sctx->last_num_patches; + return; + } + + sctx->last_ls = ls->current; + sctx->last_tcs = tcs; + sctx->last_tes_sh_base = tes_sh_base; + sctx->last_num_tcs_input_cp = num_tcs_input_cp; + /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); @@ -153,6 +166,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, * specific value is taken from the proprietary driver. */ *num_patches = MIN2(*num_patches, 40); + sctx->last_num_patches = *num_patches; output_patch0_offset = input_patch_size * *num_patches; perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; @@ -168,17 +182,6 @@ static void si_emit_derived_tess_state(struct si_context *sctx, ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); } - if (sctx->last_ls == ls->current && - sctx->last_tcs == tcs && - sctx->last_tes_sh_base == tes_sh_base && - sctx->last_num_tcs_input_cp == num_tcs_input_cp) - return; - - sctx->last_ls = ls->current; - sctx->last_tcs = tcs; - sctx->last_tes_sh_base = tes_sh_base; - sctx->last_num_tcs_input_cp = num_tcs_input_cp; - /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII) -- 2.30.2