From 50d755360098edad94696a2d7cdbee4c578fc83b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 17 Jun 2020 11:59:27 -0400 Subject: [PATCH] radeonsi: add a debug option to enable NGG culling for tessellation Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_pipe.c | 9 ++++++--- src/gallium/drivers/radeonsi/si_pipe.h | 6 ++++-- src/gallium/drivers/radeonsi/si_state_draw.c | 3 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 5 +++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 27b3e9200a5..46f2ff72492 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -94,7 +94,8 @@ static const struct debug_named_value debug_options[] = { /* 3D engine options: */ {"nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used."}, {"nongg", DBG(NO_NGG), "Disable NGG and use the legacy pipeline."}, - {"nggc", DBG(ALWAYS_NGG_CULLING), "Always use NGG culling even when it can hurt."}, + {"nggc", DBG(ALWAYS_NGG_CULLING_ALL), "Always use NGG culling even when it can hurt."}, + {"nggctess", DBG(ALWAYS_NGG_CULLING_TESS), "Always use NGG culling for tessellation."}, {"nonggc", DBG(NO_NGG_CULLING), "Disable NGG culling."}, {"alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader."}, {"pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls."}, @@ -1138,8 +1139,10 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->use_ngg = sscreen->info.chip_class >= GFX10 && sscreen->info.family != CHIP_NAVI14 && !(sscreen->debug_flags & DBG(NO_NGG)); sscreen->use_ngg_culling = sscreen->use_ngg && !(sscreen->debug_flags & DBG(NO_NGG_CULLING)); - sscreen->always_use_ngg_culling = - sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING); + sscreen->always_use_ngg_culling_all = + sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL); + sscreen->always_use_ngg_culling_tess = + sscreen->use_ngg_culling && sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS); sscreen->use_ngg_streamout = false; /* Only enable primitive binning on APUs by default. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 9ad14cab96a..467538c76cd 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -188,7 +188,8 @@ enum /* 3D engine options: */ DBG_NO_GFX, DBG_NO_NGG, - DBG_ALWAYS_NGG_CULLING, + DBG_ALWAYS_NGG_CULLING_ALL, + DBG_ALWAYS_NGG_CULLING_TESS, DBG_NO_NGG_CULLING, DBG_ALWAYS_PD, DBG_PD, @@ -517,7 +518,8 @@ struct si_screen { bool llvm_has_working_vgpr_indexing; bool use_ngg; bool use_ngg_culling; - bool always_use_ngg_culling; + bool always_use_ngg_culling_all; + bool always_use_ngg_culling_tess; bool use_ngg_streamout; struct { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c7223d94122..b123e610ffe 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1864,7 +1864,8 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Update NGG culling settings. */ if (sctx->ngg && !dispatch_prim_discard_cs && rast_prim == PIPE_PRIM_TRIANGLES && !sctx->gs_shader.cso && /* GS doesn't support NGG culling. */ - (sctx->screen->always_use_ngg_culling || + (sctx->screen->always_use_ngg_culling_all || + (sctx->tes_shader.cso && sctx->screen->always_use_ngg_culling_tess) || /* At least 1024 non-indexed vertices (8 subgroups) are needed * per draw call (no TES/GS) to enable NGG culling. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b4e95b78549..d1a1aa725cd 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2752,9 +2752,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sscreen->info.chip_class >= GFX10 && sscreen->info.has_dedicated_vram && sscreen->use_ngg_culling && - /* Disallow TES by default, because TessMark results are mixed. */ (sel->type == PIPE_SHADER_VERTEX || - (sscreen->always_use_ngg_culling && sel->type == PIPE_SHADER_TESS_EVAL)) && + (sel->type == PIPE_SHADER_TESS_EVAL && + (sscreen->always_use_ngg_culling_all || + sscreen->always_use_ngg_culling_tess))) && sel->info.writes_position && !sel->info.writes_viewport_index && /* cull only against viewport 0 */ !sel->info.writes_memory && !sel->so.num_outputs && -- 2.30.2