radeonsi/ngg: try GS multi-cycling mode if default mode failed
authorPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tue, 9 Jun 2020 10:24:41 +0000 (12:24 +0200)
committerPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Wed, 10 Jun 2020 07:33:58 +0000 (09:33 +0200)
If gsprim_lds_size is larger than target_lds_size then gfx10_ngg_calculate_subgroup_info
will fail.

This commit adds a logic to try the multi-cycling in this case because it's
using less memory.

This fix glsl-1.50-gs-max-output when using NGG.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5401>

src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 62aa8b4fb6693e02ffd077688d25077df65df77c..4c3176f8705106a82aec19d0e86691f0bd65a837 100644 (file)
@@ -1934,9 +1934,11 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
    max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
 
    if (gs_type == PIPE_SHADER_GEOMETRY) {
+      bool force_multi_cycling = false;
       unsigned max_out_verts_per_gsprim = gs_sel->gs_max_out_vertices * gs_num_invocations;
 
-      if (max_out_verts_per_gsprim <= 256) {
+retry_select_mode:
+      if (max_out_verts_per_gsprim <= 256 && !force_multi_cycling) {
          if (max_out_verts_per_gsprim) {
             max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
          }
@@ -1951,6 +1953,13 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
 
       esvert_lds_size = es_sel->esgs_itemsize / 4;
       gsprim_lds_size = (gs_sel->gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
+
+      if (gsprim_lds_size > target_lds_size && !force_multi_cycling) {
+         if (gs_sel->tess_turns_off_ngg || es_sel->type != PIPE_SHADER_TESS_EVAL) {
+            force_multi_cycling = true;
+            goto retry_select_mode;
+         }
+      }
    } else {
       /* VS and TES. */
       /* LDS size for passing data from ES to GS. */