From 60299e9abe8513b36fe7979fbf36a99e4070e8d1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 6 Apr 2018 22:26:49 -0400 Subject: [PATCH] radeonsi: don't emit partial flushes for internal CS flushes only MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Tested-by: Benedikt Schemmer Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_buffer.c | 6 +++--- src/gallium/drivers/radeonsi/si_dma_cs.c | 2 +- src/gallium/drivers/radeonsi/si_fence.c | 5 ++++- src/gallium/drivers/radeonsi/si_gfx_cs.c | 4 ++-- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++-- src/gallium/drivers/radeonsi/si_texture.c | 2 +- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 12 ++++++++---- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 12 ++++++++---- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 3 ++- 10 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 1420702d8d4..d17b2c6a831 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -64,10 +64,10 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx, sctx->ws->cs_is_buffer_referenced(sctx->gfx_cs, resource->buf, rusage)) { if (usage & PIPE_TRANSFER_DONTBLOCK) { - si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; } else { - si_flush_gfx_cs(sctx, 0, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); busy = true; } } @@ -725,7 +725,7 @@ static bool si_resource_commit(struct pipe_context *pctx, if (radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) && ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, res->buf, RADEON_USAGE_READWRITE)) { - si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } if (radeon_emitted(ctx->dma_cs, 0) && ctx->ws->cs_is_buffer_referenced(ctx->dma_cs, diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index 7af7c5623b7..1eefaeb6ad5 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -58,7 +58,7 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw, (src && ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, src->buf, RADEON_USAGE_WRITE)))) - si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); /* Flush if there's not enough space, or if the memory usage per IB * is too large. diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 26d6c43b34d..19fcb96041f 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -374,7 +374,10 @@ static boolean si_fence_finish(struct pipe_screen *screen, * not going to wait. */ threaded_context_unwrap_sync(ctx); - si_flush_gfx_cs(sctx, timeout ? 0 : PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, + (timeout ? 0 : PIPE_FLUSH_ASYNC) | + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, + NULL); rfence->gfx_unflushed.ctx = NULL; if (!timeout) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 147433b69b6..ddfdb497364 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -47,7 +47,7 @@ void si_need_gfx_cs_space(struct si_context *ctx) ctx->vram, ctx->gtt))) { ctx->gtt = 0; ctx->vram = 0; - si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return; } ctx->gtt = 0; @@ -61,7 +61,7 @@ void si_need_gfx_cs_space(struct si_context *ctx) */ unsigned need_dwords = 2048 + ctx->num_cs_dw_queries_suspend; if (!ctx->ws->cs_check_space(cs, need_dwords)) - si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 125b3a72bfb..351c9f4cd38 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1340,7 +1340,7 @@ radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs, sctx->vram + rbo->vram_usage, sctx->gtt + rbo->gart_usage)) - si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, rbo, usage, priority); } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 7e1660415f5..67ab75bbd2d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2774,7 +2774,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) /* Flush the context to re-emit both init_config states. */ sctx->initial_gfx_cs_size = 0; /* force flush */ - si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); /* Set ring bindings. */ if (sctx->esgs_ring) { @@ -3051,7 +3051,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) */ si_pm4_upload_indirect_buffer(sctx, sctx->init_config); sctx->initial_gfx_cs_size = 0; /* force flush */ - si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } /** diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 17957f18a5f..b41a0d1b925 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1869,7 +1869,7 @@ static void si_texture_transfer_unmap(struct pipe_context *ctx, * The result is that the kernel memory manager is never a bottleneck. */ if (sctx->num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) { - si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL); + si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); sctx->num_alloc_tex_transfer_bytes = 0; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 22b5a73143d..9b6d6e83032 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -239,7 +239,8 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, * Only check whether the buffer is being used for write. */ if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, RADEON_USAGE_WRITE)) { - cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; } @@ -249,7 +250,8 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, } } else { if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; } @@ -272,7 +274,8 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, if (cs) { if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, RADEON_USAGE_WRITE)) { - cs->flush_cs(cs->flush_data, 0, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); } else { /* Try to avoid busy-waiting in amdgpu_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) @@ -286,7 +289,8 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, /* Mapping for write. */ if (cs) { if (amdgpu_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, 0, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); } else { /* Try to avoid busy-waiting in amdgpu_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 1617a2fe32e..6652977e586 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -516,7 +516,8 @@ static void *radeon_bo_map(struct pb_buffer *buf, * * Only check whether the buffer is being used for write. */ if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { - cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; } @@ -526,7 +527,8 @@ static void *radeon_bo_map(struct pb_buffer *buf, } } else { if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return NULL; } @@ -547,7 +549,8 @@ static void *radeon_bo_map(struct pb_buffer *buf, * * Only check whether the buffer is being used for write. */ if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) { - cs->flush_cs(cs->flush_data, 0, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); } radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_WRITE); @@ -555,7 +558,8 @@ static void *radeon_bo_map(struct pb_buffer *buf, /* Mapping for write. */ if (cs) { if (radeon_bo_is_referenced_by_cs(cs, bo)) { - cs->flush_cs(cs->flush_data, 0, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); } else { /* Try to avoid busy-waiting in radeon_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index a1975dff8df..9070464bec8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -407,7 +407,8 @@ static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) /* Flush if there are any relocs. Clean up otherwise. */ if (cs->csc->num_relocs) { - cs->flush_cs(cs->flush_data, PIPE_FLUSH_ASYNC, NULL); + cs->flush_cs(cs->flush_data, + RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } else { radeon_cs_context_cleanup(cs->csc); cs->base.used_vram = 0; -- 2.30.2