From b58e5fb6f317be771326f98d498483e45942beaf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Apr 2019 11:12:34 -0400 Subject: [PATCH] radeonsi: use CP DMA for the null const buffer clear on CIK This is a workaround for a thread deadlock that I have no idea why it occurs. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108879 Fixes: 9b331e462e5021d994859756d46cd2519d9c9c6e Acked-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/si_clear.c | 6 +++--- src/gallium/drivers/radeonsi/si_compute_blit.c | 8 +++++--- src/gallium/drivers/radeonsi/si_pipe.c | 7 +++++-- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++- src/gallium/drivers/radeonsi/si_test_dma.c | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index e1805f2a1c9..ead680b857b 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -263,7 +263,7 @@ void vi_dcc_clear_level(struct si_context *sctx, } si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); } /* Set the same micro tile mode as the destination of the last MSAA resolve. @@ -496,7 +496,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, uint32_t clear_value = 0xCCCCCCCC; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); fmask_decompress_needed = true; } @@ -524,7 +524,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, uint32_t clear_value = 0; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); eliminate_needed = true; } diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 1abeac6adb0..fb0d8d2f1b6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -186,7 +186,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, - uint32_t clear_value_size, enum si_coherency coher) + uint32_t clear_value_size, enum si_coherency coher, + bool force_cpdma) { if (!size) return; @@ -250,7 +251,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, * about buffer placements. */ if (clear_value_size > 4 || - (clear_value_size == 4 && + (!force_cpdma && + clear_value_size == 4 && offset % 4 == 0 && (size > 32*1024 || sctx->chip_class <= VI))) { si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0, @@ -284,7 +286,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx, int clear_value_size) { si_clear_buffer((struct si_context*)ctx, dst, offset, size, (uint32_t*)clear_value, - clear_value_size, SI_COHERENCY_SHADER); + clear_value_size, SI_COHERENCY_SHADER, false); } void si_copy_buffer(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5caeb575623..fa96ce34224 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -637,11 +637,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_begin_new_gfx_cs(sctx); if (sctx->chip_class == CIK) { - /* Clear the NULL constant buffer, because loads should return zeros. */ + /* Clear the NULL constant buffer, because loads should return zeros. + * Note that this forces CP DMA to be used, because clover deadlocks + * for some reason when the compute codepath is used. + */ uint32_t clear_value = 0; si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, - &clear_value, 4, SI_COHERENCY_SHADER); + &clear_value, 4, SI_COHERENCY_SHADER, true); } return &sctx->b; fail: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 301d38649bf..aaa95f32d20 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1189,7 +1189,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, enum si_cache_policy cache_policy); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, - uint32_t clear_value_size, enum si_coherency coher); + uint32_t clear_value_size, enum si_coherency coher, + bool force_cpdma); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size); diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c index 90a2032cd80..7e396e671be 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma.c +++ b/src/gallium/drivers/radeonsi/si_test_dma.c @@ -309,7 +309,7 @@ void si_test_dma(struct si_screen *sscreen) /* clear dst pixels */ uint32_t zero = 0; si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, - SI_COHERENCY_SHADER); + SI_COHERENCY_SHADER, false); memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); /* preparation */ -- 2.30.2