From 2bb88b2fdc0d9271079e45a009ddbc6bfa6a0f37 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 9 Jan 2020 16:41:13 -0500 Subject: [PATCH] radeonsi: don't enable VBOs in user SGPRs if compute-based culling can be used Reviewed-by: Pierre-Eric Pelloux-Prayer --- .../radeonsi/si_compute_prim_discard.c | 56 ++++++++++--------- src/gallium/drivers/radeonsi/si_pipe.c | 15 ++++- src/gallium/drivers/radeonsi/si_pipe.h | 5 +- 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 31c18e098e6..b6d92da8233 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -187,36 +187,38 @@ /* For emulating the rewind packet on CI. */ #define FORCE_REWIND_EMULATION 0 -void si_initialize_prim_discard_tunables(struct si_context *sctx) +void si_initialize_prim_discard_tunables(struct si_screen *sscreen, + bool is_aux_context, + unsigned *prim_discard_vertex_count_threshold, + unsigned *index_ring_size_per_ib) { - sctx->prim_discard_vertex_count_threshold = UINT_MAX; /* disable */ + *prim_discard_vertex_count_threshold = UINT_MAX; /* disable */ - if (sctx->chip_class == GFX6 || /* SI support is not implemented */ - !sctx->screen->info.has_gds_ordered_append || - sctx->screen->debug_flags & DBG(NO_PD) || - /* If aux_context == NULL, we are initializing aux_context right now. */ - !sctx->screen->aux_context) + if (sscreen->info.chip_class == GFX6 || /* SI support is not implemented */ + !sscreen->info.has_gds_ordered_append || + sscreen->debug_flags & DBG(NO_PD) || + is_aux_context) return; /* TODO: enable this after the GDS kernel memory management is fixed */ bool enable_on_pro_graphics_by_default = false; - if (sctx->screen->debug_flags & DBG(ALWAYS_PD) || - sctx->screen->debug_flags & DBG(PD) || + if (sscreen->debug_flags & DBG(ALWAYS_PD) || + sscreen->debug_flags & DBG(PD) || (enable_on_pro_graphics_by_default && - sctx->screen->info.is_pro_graphics && - (sctx->family == CHIP_BONAIRE || - sctx->family == CHIP_HAWAII || - sctx->family == CHIP_TONGA || - sctx->family == CHIP_FIJI || - sctx->family == CHIP_POLARIS10 || - sctx->family == CHIP_POLARIS11 || - sctx->family == CHIP_VEGA10 || - sctx->family == CHIP_VEGA20))) { - sctx->prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */ - - if (sctx->screen->debug_flags & DBG(ALWAYS_PD)) - sctx->prim_discard_vertex_count_threshold = 0; /* always enable */ + sscreen->info.is_pro_graphics && + (sscreen->info.family == CHIP_BONAIRE || + sscreen->info.family == CHIP_HAWAII || + sscreen->info.family == CHIP_TONGA || + sscreen->info.family == CHIP_FIJI || + sscreen->info.family == CHIP_POLARIS10 || + sscreen->info.family == CHIP_POLARIS11 || + sscreen->info.family == CHIP_VEGA10 || + sscreen->info.family == CHIP_VEGA20))) { + *prim_discard_vertex_count_threshold = 6000 * 3; /* 6K triangles */ + + if (sscreen->debug_flags & DBG(ALWAYS_PD)) + *prim_discard_vertex_count_threshold = 0; /* always enable */ const uint32_t MB = 1024 * 1024; const uint64_t GB = 1024 * 1024 * 1024; @@ -224,12 +226,12 @@ void si_initialize_prim_discard_tunables(struct si_context *sctx) /* The total size is double this per context. * Greater numbers allow bigger gfx IBs. */ - if (sctx->screen->info.vram_size <= 2 * GB) - sctx->index_ring_size_per_ib = 64 * MB; - else if (sctx->screen->info.vram_size <= 4 * GB) - sctx->index_ring_size_per_ib = 128 * MB; + if (sscreen->info.vram_size <= 2 * GB) + *index_ring_size_per_ib = 64 * MB; + else if (sscreen->info.vram_size <= 4 * GB) + *index_ring_size_per_ib = 128 * MB; else - sctx->index_ring_size_per_ib = 256 * MB; + *index_ring_size_per_ib = 256 * MB; } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a69f6c07800..f37a4b222bb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -593,7 +593,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->queued.named.rasterizer = sctx->discard_rasterizer_state; si_init_draw_functions(sctx); - si_initialize_prim_discard_tunables(sctx); + + /* If aux_context == NULL, we are initializing aux_context right now. */ + bool is_aux_context = !sscreen->aux_context; + si_initialize_prim_discard_tunables(sscreen, is_aux_context, + &sctx->prim_discard_vertex_count_threshold, + &sctx->index_ring_size_per_ib); } /* Initialize SDMA functions. */ @@ -1092,7 +1097,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); - sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1; + unsigned prim_discard_vertex_count_threshold, tmp; + si_initialize_prim_discard_tunables(sscreen, false, + &prim_discard_vertex_count_threshold, + &tmp); + /* Compute-shader-based culling doesn't support VBOs in user SGPRs. */ + if (prim_discard_vertex_count_threshold != UINT_MAX) + sscreen->num_vbos_in_user_sgprs = sscreen->info.chip_class >= GFX9 ? 5 : 1; /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6c92dc1a81e..b0287688f9a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1437,7 +1437,10 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, unsigned base_vertex, uint64_t input_indexbuf_va, unsigned input_indexbuf_max_elements); -void si_initialize_prim_discard_tunables(struct si_context *sctx); +void si_initialize_prim_discard_tunables(struct si_screen *sscreen, + bool is_aux_context, + unsigned *prim_discard_vertex_count_threshold, + unsigned *index_ring_size_per_ib); /* si_pipe.c */ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler); -- 2.30.2