radeonsi/gfx10: fix the wave size for compute-based culling
authorMarek Olšák <marek.olsak@amd.com>
Fri, 20 Mar 2020 21:35:56 +0000 (17:35 -0400)
committerMarge Bot <eric+marge@anholt.net>
Sat, 28 Mar 2020 00:58:34 +0000 (00:58 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4269>

src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 41c5bd45036ba8fe737380db76c5321057d4c00d..400f2152243aa41d7f4fad3a6c05eda411f514ec 100644 (file)
@@ -1967,13 +1967,14 @@ static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)
 
 static inline unsigned si_get_wave_size(struct si_screen *sscreen,
                                        enum pipe_shader_type shader_type,
-                                       bool ngg, bool es)
+                                       bool ngg, bool es, bool prim_discard_cs)
 {
        if (shader_type == PIPE_SHADER_COMPUTE)
                return sscreen->compute_wave_size;
        else if (shader_type == PIPE_SHADER_FRAGMENT)
                return sscreen->ps_wave_size;
-       else if ((shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
+       else if ((shader_type == PIPE_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */
+                (shader_type == PIPE_SHADER_VERTEX && es && !ngg) ||
                 (shader_type == PIPE_SHADER_TESS_EVAL && es && !ngg) ||
                 (shader_type == PIPE_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */
                return 64;
@@ -1984,7 +1985,8 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen,
 static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
 {
        return si_get_wave_size(shader->selector->screen, shader->selector->type,
-                               shader->key.as_ngg, shader->key.as_es);
+                               shader->key.as_ngg, shader->key.as_es,
+                               shader->key.opt.vs_as_prim_discard_cs);
 }
 
 #define PRINT_ERR(fmt, args...) \
index 8c8f90b80e23de98a0526bc4d8c76fe2c93baeb9..f0e60087dbfc5ae510ee79869ad9ba2d207af3a5 100644 (file)
@@ -1636,6 +1636,7 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info,
        key->vs_prolog.as_ls = shader_out->key.as_ls;
        key->vs_prolog.as_es = shader_out->key.as_es;
        key->vs_prolog.as_ngg = shader_out->key.as_ngg;
+       key->vs_prolog.as_prim_discard_cs = shader_out->key.opt.vs_as_prim_discard_cs;
 
        if (ngg_cull_shader) {
                key->vs_prolog.gs_fast_launch_tri_list = !!(shader_out->key.opt.ngg_culling &
@@ -1656,6 +1657,12 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info,
                key->vs_prolog.num_merged_next_stage_vgprs = 5;
        }
 
+       /* Only one of these combinations can be set. as_ngg can be set with as_es. */
+       assert(key->vs_prolog.as_ls +
+              key->vs_prolog.as_ngg +
+              (key->vs_prolog.as_es && !key->vs_prolog.as_ngg) +
+              key->vs_prolog.as_prim_discard_cs <= 1);
+
        /* Enable loading the InstanceID VGPR. */
        uint16_t input_mask = u_bit_consecutive(0, info->num_inputs);
 
@@ -2088,6 +2095,7 @@ si_get_shader_part(struct si_screen *sscreen,
                shader.key.as_ls = key->vs_prolog.as_ls;
                shader.key.as_es = key->vs_prolog.as_es;
                shader.key.as_ngg = key->vs_prolog.as_ngg;
+               shader.key.opt.vs_as_prim_discard_cs = key->vs_prolog.as_prim_discard_cs;
                break;
        case PIPE_SHADER_TESS_CTRL:
                assert(!prolog);
@@ -2110,7 +2118,8 @@ si_get_shader_part(struct si_screen *sscreen,
        struct si_shader_context ctx;
        si_llvm_context_init(&ctx, sscreen, compiler,
                             si_get_wave_size(sscreen, type, shader.key.as_ngg,
-                                             shader.key.as_es));
+                                             shader.key.as_es,
+                                             shader.key.opt.vs_as_prim_discard_cs));
        ctx.shader = &shader;
        ctx.type = type;
 
index 1ee9aba9faf36ebba8e3858a99c683c920eb09de..ef571a5d684aa1a078ad866359f26e922dc84425 100644 (file)
@@ -566,6 +566,7 @@ union si_shader_part_key {
                unsigned        as_ls:1;
                unsigned        as_es:1;
                unsigned        as_ngg:1;
+               unsigned        as_prim_discard_cs:1;
                unsigned        has_ngg_cull_inputs:1; /* from the NGG cull shader */
                unsigned        gs_fast_launch_tri_list:1; /* for NGG culling */
                unsigned        gs_fast_launch_tri_strip:1; /* for NGG culling */
index ac734d0924b05506b7eec9d2fefd4cab604856b2..99ffdd2e980a140d7ea9698579c42bb944235cb8 100644 (file)
@@ -523,7 +523,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        shader->is_gs_copy_shader = true;
 
        si_llvm_context_init(&ctx, sscreen, compiler,
-                            si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
+                            si_get_wave_size(sscreen, PIPE_SHADER_VERTEX,
+                                             false, false, false));
        ctx.shader = shader;
        ctx.type = PIPE_SHADER_VERTEX;
 
index ce7e791be3d6ebb8d86830f1e6d9ffb7b585972d..be7cda1d33245373b46a4d79b997db4b7ca93573 100644 (file)
@@ -71,7 +71,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
                shader_variant_flags |= 1 << 0;
        if (sel->nir)
                shader_variant_flags |= 1 << 1;
-       if (si_get_wave_size(sel->screen, sel->type, ngg, es) == 32)
+       if (si_get_wave_size(sel->screen, sel->type, ngg, es, false) == 32)
                shader_variant_flags |= 1 << 2;
        if (sel->type == PIPE_SHADER_FRAGMENT &&
            sel->info.uses_derivatives &&