From 2857b14bbae5f63653c88bb77dc6c05cff55e804 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 19 Apr 2017 01:53:35 +0200 Subject: [PATCH] radeonsi/gfx9: always compile monolithic ES-GS (asynchronously) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In addition to the non-monolithic variant. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.h | 10 +++++++++- .../drivers/radeonsi/si_state_shaders.c | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 09ddf43f87d..a508ece85b1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -445,12 +445,20 @@ struct si_shader_key { } mono; /* Optimization flags for asynchronous compilation only. */ - union { + struct { struct { uint64_t kill_outputs; /* "get_unique_index" bits */ uint32_t kill_outputs2; /* "get_unique_index2" bits */ unsigned clip_disable:1; } hw_vs; /* HW VS (it can be VS, TES, GS) */ + + /* For shaders where monolithic variants have better code. + * + * This is a flag that has no effect on code generation, + * but forces monolithic shaders to be used as soon as + * possible, because it's in the "opt" group. + */ + unsigned prefer_mono:1; } opt; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b7f848fd077..b2cdcb71944 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1279,6 +1279,25 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, key, &key->part.gs.vs_prolog); key->part.gs.es = sctx->vs_shader.cso; } + + /* Merged ES-GS can have unbalanced wave usage. + * + * ES threads are per-vertex, while GS threads are + * per-primitive. So without any amplification, there + * are fewer GS threads than ES threads, which can result + * in empty (no-op) GS waves. With too much amplification, + * there are more GS threads than ES threads, which + * can result in empty (no-op) ES waves. + * + * Non-monolithic shaders are implemented by setting EXEC + * at the beginning of shader parts, and don't jump to + * the end if EXEC is 0. + * + * Monolithic shaders use conditional blocks, so they can + * jump and skip empty waves of ES or GS. So set this to + * always use optimized variants, which are monolithic. + */ + key->opt.prefer_mono = 1; } key->part.gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix; break; -- 2.30.2