radeonsi: don't enable VBOs in user SGPRs if compute-based culling can be used
authorMarek Olšák <marek.olsak@amd.com>
Thu, 9 Jan 2020 21:41:13 +0000 (16:41 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 13 Jan 2020 20:57:07 +0000 (15:57 -0500)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
src/gallium/drivers/radeonsi/si_compute_prim_discard.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h

index 31c18e098e6df2365e63e436af315e0d84265bc1..b6d92da823322d595df3a8abf17d261d00388ed3 100644 (file)
 /* For emulating the rewind packet on CI. */
 #define FORCE_REWIND_EMULATION         0
 
-void si_initialize_prim_discard_tunables(struct si_context *sctx)
+void si_initialize_prim_discard_tunables(struct si_screen *sscreen,
+                                        bool is_aux_context,
+                                        unsigned *prim_discard_vertex_count_threshold,
+                                        unsigned *index_ring_size_per_ib)
 {
-       sctx->prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
+       *prim_discard_vertex_count_threshold = UINT_MAX; /* disable */
 
-       if (sctx->chip_class == GFX6 || /* SI support is not implemented */
-           !sctx->screen->info.has_gds_ordered_append ||
-           sctx->screen->debug_flags & DBG(NO_PD) ||
-           /* If aux_context == NULL, we are initializing aux_context right now. */
-           !sctx->screen->aux_context)
+       if (sscreen->info.chip_class == GFX6 || /* SI support is not implemented */
+           !sscreen->info.has_gds_ordered_append ||
+           sscreen->debug_flags & DBG(NO_PD) ||
+           is_aux_context)
                return;
 
        /* TODO: enable this after the GDS kernel memory management is fixed */
        bool enable_on_pro_graphics_by_default = false;
 
-       if (sctx->screen->debug_flags & DBG(ALWAYS_PD) ||
-           sctx->screen->debug_flags & DBG(PD) ||
+       if (sscreen->debug_flags & DBG(ALWAYS_PD) ||
+           sscreen->debug_flags & DBG(PD) ||
            (enable_on_pro_graphics_by_default &&
-            sctx->screen->info.is_pro_graphics &&
-            (sctx->family == CHIP_BONAIRE ||
-             sctx->family == CHIP_HAWAII ||
-             sctx->family == CHIP_TONGA ||
-             sctx->family == CHIP_FIJI ||
-             sctx->family == CHIP_POLARIS10 ||
-             sctx->family == CHIP_POLARIS11 ||
-             sctx->family == CHIP_VEGA10 ||
-             sctx->family == CHIP_VEGA20))) {
-               sctx->prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
-
-               if (sctx->screen->debug_flags & DBG(ALWAYS_PD))
-                       sctx->prim_discard_vertex_count_threshold = 0; /* always enable */
+            sscreen->info.is_pro_graphics &&
+            (sscreen->info.family == CHIP_BONAIRE ||
+             sscreen->info.family == CHIP_HAWAII ||
+             sscreen->info.family == CHIP_TONGA ||
+             sscreen->info.family == CHIP_FIJI ||
+             sscreen->info.family == CHIP_POLARIS10 ||
+             sscreen->info.family == CHIP_POLARIS11 ||
+             sscreen->info.family == CHIP_VEGA10 ||
+             sscreen->info.family == CHIP_VEGA20))) {
+               *prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */
+
+               if (sscreen->debug_flags & DBG(ALWAYS_PD))
+                       *prim_discard_vertex_count_threshold = 0; /* always enable */
 
                const uint32_t MB = 1024 * 1024;
                const uint64_t GB = 1024 * 1024 * 1024;
@@ -224,12 +226,12 @@ void si_initialize_prim_discard_tunables(struct si_context *sctx)
                /* The total size is double this per context.
                 * Greater numbers allow bigger gfx IBs.
                 */
-               if (sctx->screen->info.vram_size <= 2 * GB)
-                       sctx->index_ring_size_per_ib = 64 * MB;
-               else if (sctx->screen->info.vram_size <= 4 * GB)
-                       sctx->index_ring_size_per_ib = 128 * MB;
+               if (sscreen->info.vram_size <= 2 * GB)
+                       *index_ring_size_per_ib = 64 * MB;
+               else if (sscreen->info.vram_size <= 4 * GB)
+                       *index_ring_size_per_ib = 128 * MB;
                else
-                       sctx->index_ring_size_per_ib = 256 * MB;
+                       *index_ring_size_per_ib = 256 * MB;
        }
 }
 
index a69f6c07800bde434a52fc0efdb0fee97319803f..f37a4b222bb912ca6fa3f7d98cd1d47cded6964a 100644 (file)
@@ -593,7 +593,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                sctx->queued.named.rasterizer = sctx->discard_rasterizer_state;
 
                si_init_draw_functions(sctx);
-               si_initialize_prim_discard_tunables(sctx);
+
+               /* If aux_context == NULL, we are initializing aux_context right now. */
+               bool is_aux_context = !sscreen->aux_context;
+               si_initialize_prim_discard_tunables(sscreen, is_aux_context,
+                                                   &sctx->prim_discard_vertex_count_threshold,
+                                                   &sctx->index_ring_size_per_ib);
        }
 
        /* Initialize SDMA functions. */
@@ -1092,7 +1097,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
        if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
                si_init_perfcounters(sscreen);
 
-       sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
+       unsigned prim_discard_vertex_count_threshold, tmp;
+       si_initialize_prim_discard_tunables(sscreen, false,
+                                           &prim_discard_vertex_count_threshold,
+                                           &tmp);
+       /* Compute-shader-based culling doesn't support VBOs in user SGPRs. */
+       if (prim_discard_vertex_count_threshold != UINT_MAX)
+               sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1;
 
        /* Determine tessellation ring info. */
        bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
index 6c92dc1a81e455ac238d5191d1c86b5619cac562..b0287688f9a4ae5cd88308c146a9c2a55d6f8fdc 100644 (file)
@@ -1437,7 +1437,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
                                          unsigned base_vertex,
                                          uint64_t input_indexbuf_va,
                                          unsigned input_indexbuf_max_elements);
-void si_initialize_prim_discard_tunables(struct si_context *sctx);
+void si_initialize_prim_discard_tunables(struct si_screen *sscreen,
+                                        bool is_aux_context,
+                                        unsigned *prim_discard_vertex_count_threshold,
+                                        unsigned *index_ring_size_per_ib);
 
 /* si_pipe.c */
 void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler);