From 37db9d28650c21d2091a654b7c6a636927ef584d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Jul 2019 22:12:36 -0400 Subject: [PATCH] radeonsi/gfx10: fix unnecessary LDS overallocation for NGG GS Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 8 +------- src/gallium/drivers/radeonsi/si_shader.c | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index e69bc810b63..de0299740f8 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1279,17 +1279,11 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) /* We can't allow using the whole LDS, because GS waves compete with * other shader stages for LDS space. * - * Streamout can increase the ESGS buffer size later on, so be more - * conservative with streamout and use 4K dwords. This may be suboptimal. - * - * Otherwise, use the limit of 7K dwords. The reason is that we need - * to leave some headroom for the max_esverts increase at the end. - * * TODO: We should really take the shader's internal LDS use into * account. The linker will fail if the size is greater than * 8K dwords. */ - const unsigned max_lds_size = (gs_sel->so.num_outputs ? 4 : 7) * 1024 - 128; + const unsigned max_lds_size = 8 * 1024 - 768; const unsigned target_lds_size = max_lds_size; unsigned esvert_lds_size = 0; unsigned gsprim_lds_size = 0; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f217abd1501..8f392d640fe 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5242,7 +5242,7 @@ static bool si_shader_binary_open(struct si_screen *screen, } if (sel && shader->key.as_ngg) { - if (sel->so.num_outputs) { + if (sel->type != PIPE_SHADER_GEOMETRY && sel->so.num_outputs) { unsigned esgs_vertex_bytes = 4 * (4 * sel->info.num_outputs + 1); esgs_ring_size = MAX2(esgs_ring_size, shader->ngg.max_out_verts * esgs_vertex_bytes); -- 2.30.2