From 60946c0d60610b03bc297df17ec7a3cca1e5f6e8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 30 Apr 2016 01:21:22 +0200 Subject: [PATCH] gallium/radeon: add a heuristic for better (S)DMA performance MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Deucher Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.c | 14 ++++++++++++++ src/gallium/drivers/radeon/radeon_winsys.h | 2 ++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 8 ++++++++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 8 ++++++++ 4 files changed, 32 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index ce8d76963a9..bc7f5f53e5d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -176,6 +176,20 @@ void r600_dma_emit_wait_idle(struct r600_common_context *rctx) /* done at the end of DMA calls, so increment this. */ rctx->num_dma_calls++; + /* IBs using too little memory are limited by the IB submission overhead. + * IBs using too much memory are limited by the kernel/TTM overhead. + * Too long IBs create CPU-GPU pipeline bubbles and add latency. + * + * This heuristic makes sure that DMA requests are executed + * very soon after the call is made and lowers memory usage. + * It improves texture upload performance by keeping the DMA + * engine busy while uploads are being submitted. + */ + if (rctx->ws->cs_query_memory_usage(rctx->dma.cs) > 64 * 1024 * 1024) { + rctx->dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); + return; + } + r600_need_dma_space(rctx, 1, NULL, NULL); if (cs->cdw == 0) /* empty queue */ diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 442a4613e96..e73fa14a17e 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -683,6 +683,8 @@ struct radeon_winsys { */ boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt); + uint64_t (*cs_query_memory_usage)(struct radeon_winsys_cs *cs); + /** * Return the buffer list. * diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 9534b972a46..a5d703347c7 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -563,6 +563,13 @@ static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64 return gtt < ws->info.gart_size * 0.7; } +static uint64_t amdgpu_cs_query_memory_usage(struct radeon_winsys_cs *rcs) +{ + struct amdgpu_cs *cs = amdgpu_cs(rcs); + + return cs->used_vram + cs->used_gart; +} + static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs, struct radeon_bo_list_item *list) { @@ -828,6 +835,7 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.cs_lookup_buffer = amdgpu_cs_lookup_buffer; ws->base.cs_validate = amdgpu_cs_validate; ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit; + ws->base.cs_query_memory_usage = amdgpu_cs_query_memory_usage; ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; ws->base.cs_flush = amdgpu_cs_flush; ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 7a901a175f8..9ac50816f86 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -398,6 +398,13 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui return gtt < cs->ws->info.gart_size * 0.7; } +static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + return cs->csc->used_vram + cs->csc->used_gart; +} + static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs, struct radeon_bo_list_item *list) { @@ -671,6 +678,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; ws->base.cs_validate = radeon_drm_cs_validate; ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; + ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage; ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list; ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; -- 2.30.2