From a512da36aed3a1c893b34681dec2dfd51d250ee5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 Apr 2016 19:29:55 +0200 Subject: [PATCH] gallium/radeon: fix (S)DMA read-after-write hazards MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Deucher Reviewed-by: Nicolai Hähnle --- .../drivers/r600/evergreen_hw_context.c | 1 + src/gallium/drivers/r600/evergreen_state.c | 1 + src/gallium/drivers/r600/r600_hw_context.c | 1 + src/gallium/drivers/r600/r600_state.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.c | 24 +++++++++++++++++++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/cik_sdma.c | 1 + src/gallium/drivers/radeonsi/si_dma.c | 2 ++ 8 files changed, 32 insertions(+) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index a0f46800403..c2dba8c1996 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -77,6 +77,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, src_offset += csize << shift; size -= csize; } + r600_dma_emit_wait_idle(&rctx->b); } /* The max number of bytes to copy per packet. */ diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 672ddd4fcbe..acf60c6d8d1 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3470,6 +3470,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, addr += cheight * pitch; y += cheight; } + r600_dma_emit_wait_idle(&rctx->b); } static void evergreen_dma_copy(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 2bc6d3ffce4..fa1028be7af 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -484,4 +484,5 @@ void r600_dma_copy_buffer(struct r600_context *rctx, src_offset += csize << 2; size -= csize; } + r600_dma_emit_wait_idle(&rctx->b); } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 48e3663ebf7..715c6f11145 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2941,6 +2941,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, addr += cheight * pitch; y += cheight; } + r600_dma_emit_wait_idle(&rctx->b); return TRUE; } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index cea6b7bce82..3e15b143700 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -149,6 +149,30 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw) } } +/* This is required to prevent read-after-write hazards. */ +void r600_dma_emit_wait_idle(struct r600_common_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->dma.cs; + + /* done at the end of DMA calls, so increment this. */ + rctx->num_dma_calls++; + + r600_need_dma_space(rctx, 1); + + if (cs->cdw == 0) /* empty queue */ + return; + + /* NOP waits for idle on Evergreen and later. */ + if (rctx->chip_class >= CIK) + radeon_emit(cs, 0x00000000); /* NOP */ + else if (rctx->chip_class >= EVERGREEN) + radeon_emit(cs, 0xf0000000); /* NOP */ + else { + /* TODO: R600-R700 should use the FENCE packet. + * CS checker support is required. */ + } +} + static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) { } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 47b73653e93..03449536b5b 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -598,6 +598,7 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw); +void r600_dma_emit_wait_idle(struct r600_common_context *rctx); /* r600_gpu_load.c */ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 88a994ee390..5021578871b 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -87,6 +87,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, dst_offset + size); cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size); + r600_dma_emit_wait_idle(&ctx->b); } static void cik_sdma_copy(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 033eb7b1479..03321f583a5 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -82,6 +82,7 @@ static void si_dma_copy_buffer(struct si_context *ctx, src_offset += csize << shift; size -= csize; } + r600_dma_emit_wait_idle(&ctx->b); } static void si_dma_copy_tile(struct si_context *ctx, @@ -188,6 +189,7 @@ static void si_dma_copy_tile(struct si_context *ctx, addr += cheight * pitch; tiled_y += cheight; } + r600_dma_emit_wait_idle(&ctx->b); } static void si_dma_copy(struct pipe_context *ctx, -- 2.30.2