From 64c741ffb7aa0ae40c4302bc065fef0192123c6a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 30 Jul 2020 08:19:48 -0400 Subject: [PATCH] radeonsi: fix applying the NGG minimum vertex count requirement The code applied the restriction too late, which could overflow LDS size, which started happening more often after the minimum vertex count was increased for Sienna. Incorporate the clamping into the previous code for rounding up the counts. Now the LDS size can never overflow, but it may use vector lanes less efficiently (max_gsprims can be decreased more), which will be addressed in the next commit. Fixes: 4ecc39e1aa1 ("radeonsi/gfx10: NGG geometry shader PM4 and upload") Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index ee242da7ed1..0797f9cdb3a 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -2022,6 +2022,8 @@ retry_select_mode: max_esverts = MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); max_gsprims = align(max_gsprims, wavesize); max_gsprims = MIN2(max_gsprims, max_gsprims_base); @@ -2031,10 +2033,13 @@ retry_select_mode: clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); - } - /* Hardware restriction: minimum value of max_esverts */ - max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); + /* Verify the restriction. */ + assert(max_esverts >= min_esverts - 1 + max_verts_per_prim); + } else { + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); + } unsigned max_out_vertices = max_vert_out_per_gs_instance -- 2.30.2