radeonsi: use CP DMA for the null const buffer clear on CIK
authorMarek Olšák <marek.olsak@amd.com>
Fri, 12 Apr 2019 15:12:34 +0000 (11:12 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 22 Apr 2019 20:05:52 +0000 (16:05 -0400)
This is a workaround for a thread deadlock that I have no idea
why it occurs.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108879
Fixes: 9b331e462e5021d994859756d46cd2519d9c9c6e
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/gallium/drivers/radeonsi/si_clear.c
src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_test_dma.c

index e1805f2a1c93a9cb3a3293beaef31246ee06c520..ead680b857be03a4bf1ad276cdc7227afc7be73f 100644 (file)
@@ -263,7 +263,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
        }
 
        si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
-                       &clear_value, 4, SI_COHERENCY_CB_META);
+                       &clear_value, 4, SI_COHERENCY_CB_META, false);
 }
 
 /* Set the same micro tile mode as the destination of the last MSAA resolve.
@@ -496,7 +496,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
                                uint32_t clear_value = 0xCCCCCCCC;
                                si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
                                                tex->cmask_offset, tex->surface.cmask_size,
-                                               &clear_value, 4, SI_COHERENCY_CB_META);
+                                               &clear_value, 4, SI_COHERENCY_CB_META, false);
                                fmask_decompress_needed = true;
                        }
 
@@ -524,7 +524,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
                        uint32_t clear_value = 0;
                        si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
                                        tex->cmask_offset, tex->surface.cmask_size,
-                                       &clear_value, 4, SI_COHERENCY_CB_META);
+                                       &clear_value, 4, SI_COHERENCY_CB_META, false);
                        eliminate_needed = true;
                }
 
index 1abeac6adb0cef230b5c4678f8d193241b69a649..fb0d8d2f1b6e62de13681ccee4ccee6ab4a49a00 100644 (file)
@@ -186,7 +186,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
 
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                     uint64_t offset, uint64_t size, uint32_t *clear_value,
-                    uint32_t clear_value_size, enum si_coherency coher)
+                    uint32_t clear_value_size, enum si_coherency coher,
+                    bool force_cpdma)
 {
        if (!size)
                return;
@@ -250,7 +251,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                 * about buffer placements.
                 */
                if (clear_value_size > 4 ||
-                   (clear_value_size == 4 &&
+                   (!force_cpdma &&
+                    clear_value_size == 4 &&
                     offset % 4 == 0 &&
                     (size > 32*1024 || sctx->chip_class <= VI))) {
                        si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0,
@@ -284,7 +286,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
                                 int clear_value_size)
 {
        si_clear_buffer((struct si_context*)ctx, dst, offset, size, (uint32_t*)clear_value,
-                       clear_value_size, SI_COHERENCY_SHADER);
+                       clear_value_size, SI_COHERENCY_SHADER, false);
 }
 
 void si_copy_buffer(struct si_context *sctx,
index 5caeb57562362dd687a363c7600b38473c3acf09..fa96ce342243df4dbf5f4f06e06e5e11cab2c90a 100644 (file)
@@ -637,11 +637,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        si_begin_new_gfx_cs(sctx);
 
        if (sctx->chip_class == CIK) {
-               /* Clear the NULL constant buffer, because loads should return zeros. */
+               /* Clear the NULL constant buffer, because loads should return zeros.
+                * Note that this forces CP DMA to be used, because clover deadlocks
+                * for some reason when the compute codepath is used.
+                */
                uint32_t clear_value = 0;
                si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
                                sctx->null_const_buf.buffer->width0,
-                               &clear_value, 4, SI_COHERENCY_SHADER);
+                               &clear_value, 4, SI_COHERENCY_SHADER, true);
        }
        return &sctx->b;
 fail:
index 301d38649bf0e64aeffccf7d33d075bcbc493da8..aaa95f32d20ff404d9693dc754538acecc4b4e42 100644 (file)
@@ -1189,7 +1189,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
                            enum si_cache_policy cache_policy);
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                     uint64_t offset, uint64_t size, uint32_t *clear_value,
-                    uint32_t clear_value_size, enum si_coherency coher);
+                    uint32_t clear_value_size, enum si_coherency coher,
+                    bool force_cpdma);
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
                    uint64_t dst_offset, uint64_t src_offset, unsigned size);
index 90a2032cd80b1e12f8147f718b9a2536d2564f97..7e396e671be3ae09552502ba5810ae25bbfd5688 100644 (file)
@@ -309,7 +309,7 @@ void si_test_dma(struct si_screen *sscreen)
                /* clear dst pixels */
                uint32_t zero = 0;
                si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4,
-                               SI_COHERENCY_SHADER);
+                               SI_COHERENCY_SHADER, false);
                memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
 
                /* preparation */