From 68b3e92fef09330ac880e713a744dc7a57e78f05 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 30 Jul 2020 08:19:48 -0400 Subject: [PATCH] radeonsi: don't count unusable vertices to the NGG LDS size Now we get optimal LDS usage. Fixes: a23802bcb9a - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 0797f9cdb3a..efeb9e8838c 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -2027,9 +2027,15 @@ retry_select_mode: max_gsprims = align(max_gsprims, wavesize); max_gsprims = MIN2(max_gsprims, max_gsprims_base); - if (gsprim_lds_size) + if (gsprim_lds_size) { + /* Don't count unusable vertices to the LDS size. Those are vertices above + * the maximum number of vertices that can occur in the workgroup, + * which is e.g. max_gsprims * 3 for triangles. + */ + unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); max_gsprims = - MIN2(max_gsprims, (max_lds_size - max_esverts * esvert_lds_size) / gsprim_lds_size); + MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); + } clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); @@ -2067,7 +2073,9 @@ retry_select_mode: shader->ngg.prim_amp_factor = prim_amp_factor; shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; - shader->gs_info.esgs_ring_size = max_esverts * esvert_lds_size; + /* Don't count unusable vertices. */ + shader->gs_info.esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * + esvert_lds_size; shader->ngg.ngg_emit_size = max_gsprims * gsprim_lds_size; assert(shader->ngg.hw_max_esverts >= min_esverts); /* HW limitation */ -- 2.30.2