radeonsi/gfx10: update a tunable max_es_verts_base for NGG
authorMarek Olšák <marek.olsak@amd.com>
Fri, 5 Jul 2019 21:53:47 +0000 (17:53 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 20 Jul 2019 00:16:19 +0000 (20:16 -0400)
We have to fix the computation so as not to break quads.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index ae73133f51a658285bfbddb2b84714304340cb04..e69bc810b632f69db070fe993aeb36b4c99affc5 100644 (file)
@@ -1265,8 +1265,10 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
                shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
        const enum pipe_shader_type gs_type = gs_sel->type;
        const unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
-
-       const unsigned input_prim = si_get_input_prim(gs_sel);
+       /* TODO: Use QUADS as the worst case because of reuse, but triangles
+        * will always have 1 additional unoccupied vector lane. We could use
+        * that lane if the worst case was TRIANGLES. */
+       const unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_QUADS);
        const bool use_adjacency = input_prim >= PIPE_PRIM_LINES_ADJACENCY &&
                                   input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
        const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
@@ -1294,7 +1296,7 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
 
        /* All these are per subgroup: */
        bool max_vert_out_per_gs_instance = false;
-       unsigned max_esverts_base = 256;
+       unsigned max_esverts_base = 128;
        unsigned max_gsprims_base = 128; /* default prim group size clamp */
 
        /* Hardware has the following non-natural restrictions on the value
index acdbefbf64f9b51bb8f9e1ec32178b6a879fb8c6..a0fe826253af3f2275bee3e3f7c16606d40794a9 100644 (file)
@@ -599,7 +599,8 @@ void si_shader_selector_key_vs(struct si_context *sctx,
                               struct si_shader_selector *vs,
                               struct si_shader_key *key,
                               struct si_vs_prolog_bits *prolog_key);
-unsigned si_get_input_prim(const struct si_shader_selector *gs);
+unsigned si_get_input_prim(const struct si_shader_selector *gs,
+                          unsigned default_worst_case);
 
 /* si_state_draw.c */
 void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
index 3f6e4828923ad75c028e09023454b7028fa24f37..6701e9a0cf7cef790e0f25fc50d61ea8fe3216d9 100644 (file)
@@ -1064,7 +1064,8 @@ static void si_set_ge_pc_alloc(struct si_screen *sscreen,
                       S_030980_NUM_PC_LINES((culling ? 256 : 128) * sscreen->info.max_se - 1));
 }
 
-unsigned si_get_input_prim(const struct si_shader_selector *gs)
+unsigned si_get_input_prim(const struct si_shader_selector *gs,
+                          unsigned default_worst_case)
 {
        if (gs->type == PIPE_SHADER_GEOMETRY)
                return gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
@@ -1078,7 +1079,7 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
        }
 
        /* TODO: Set this correctly if the primitive type is set in the shader key. */
-       return PIPE_PRIM_TRIANGLES;
+       return default_worst_case;
 }
 
 /**
@@ -1101,7 +1102,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
                gs_info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
        bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
        unsigned gs_num_invocations = MAX2(gs_sel->gs_num_invocations, 1);
-       unsigned input_prim = si_get_input_prim(gs_sel);
+       /* Anything above TRIANGLES has the same effect as TRIANGLES here. */
+       unsigned input_prim = si_get_input_prim(gs_sel, PIPE_PRIM_TRIANGLES);
        bool break_wave_at_eoi = false;
        struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)