From e08463ac22a6e3b47ab8459c5f0f1212704b4800 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 5 Jul 2019 17:53:47 -0400 Subject: [PATCH] radeonsi/gfx10: update a tunable max_es_verts_base for NGG We have to fix the computation so as not to break quads. Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 8 +++++--- src/gallium/drivers/radeonsi/si_state.h | 3 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 8 +++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index ae73133f51a..e69bc810b63 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1265,8 +1265,10 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel; const enum pipe_shader_type gs_type = gs_sel->type; const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1); - - const unsigned input_prim = si_get_input_prim(gs_sel); + /* TODO: Use QUADS as the worst case because of reuse, but triangles + * will always have 1 additional unoccupied vector lane. We could use + * that lane if the worst case was TRIANGLES. */ + const unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_QUADS); const bool use_adjacency = input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY; const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim); @@ -1294,7 +1296,7 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) /* All these are per subgroup: */ bool max_vert_out_per_gs_instance = false; - unsigned max_esverts_base = 256; + unsigned max_esverts_base = 128; unsigned max_gsprims_base = 128; /* default prim group size clamp */ /* Hardware has the following non-natural restrictions on the value diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index acdbefbf64f..a0fe826253a 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -599,7 +599,8 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_shader_selector *vs, struct si_shader_key *key, struct si_vs_prolog_bits *prolog_key); -unsigned si_get_input_prim(const struct si_shader_selector *gs); +unsigned si_get_input_prim(const struct si_shader_selector *gs, + unsigned default_worst_case); /* si_state_draw.c */ void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3f6e4828923..6701e9a0cf7 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1064,7 +1064,8 @@ static void si_set_ge_pc_alloc(struct si_screen *sscreen, S_030980_NUM_PC_LINES((culling ? 256 : 128) * sscreen->info.max_se - 1)); } -unsigned si_get_input_prim(const struct si_shader_selector *gs) +unsigned si_get_input_prim(const struct si_shader_selector *gs, + unsigned default_worst_case) { if (gs->type == PIPE_SHADER_GEOMETRY) return gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; @@ -1078,7 +1079,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs) } /* TODO: Set this correctly if the primitive type is set in the shader key. */ - return PIPE_PRIM_TRIANGLES; + return default_worst_case; } /** @@ -1101,7 +1102,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader gs_info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid; unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1); - unsigned input_prim = si_get_input_prim(gs_sel); + /* Anything above TRIANGLES has the same effect as TRIANGLES here. */ + unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_TRIANGLES); bool break_wave_at_eoi = false; struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader); if (!pm4) -- 2.30.2