radeonsi: implement SDMA-based buffer clearing for CIK-VI
authorMarek Olšák <marek.olsak@amd.com>
Sat, 24 Dec 2016 21:52:45 +0000 (22:52 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 5 Jan 2017 17:43:23 +0000 (18:43 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/cik_sdma.c

index e0b914c50fd0961d8ce6a1093a8bece21a3c5f18..a3198450923d0f64d45218973ce09654e9635f36 100644 (file)
@@ -533,6 +533,14 @@ bool r600_check_device_reset(struct r600_common_context *rctx)
        return true;
 }
 
+static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx,
+                                          struct pipe_resource *dst,
+                                          uint64_t offset, uint64_t size,
+                                          unsigned value)
+{
+       ctx->clear_buffer(ctx, dst, offset, size, &value, 4);
+}
+
 bool r600_common_context_init(struct r600_common_context *rctx,
                              struct r600_common_screen *rscreen,
                              unsigned context_flags)
@@ -559,6 +567,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
        rctx->b.memory_barrier = r600_memory_barrier;
        rctx->b.flush = r600_flush_from_st;
        rctx->b.set_debug_callback = r600_set_debug_callback;
+       rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback;
 
        /* evergreen_compute.c has a special codepath for global buffers.
         * Everything else can use the direct path.
index 25d40da931e74d44d267a11c37b5c3617b1ee5a6..da4b63c0b69a812274b3d746174024e7d141a32e 100644 (file)
@@ -626,6 +626,9 @@ struct r600_common_context {
                         unsigned src_level,
                         const struct pipe_box *src_box);
 
+       void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+                                uint64_t offset, uint64_t size, unsigned value);
+
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                             uint64_t offset, uint64_t size, unsigned value,
                             enum r600_coherency coher);
index b3eaed58fc051fb967c7c1aa3ee38403d359b51d..698f8f6bbb2894747dc38df2aff6483999f973a5 100644 (file)
@@ -82,6 +82,47 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
        r600_dma_emit_wait_idle(&ctx->b);
 }
 
+static void cik_sdma_clear_buffer(struct pipe_context *ctx,
+                                 struct pipe_resource *dst,
+                                 uint64_t offset,
+                                 uint64_t size,
+                                 unsigned clear_value)
+{
+       struct si_context *sctx = (struct si_context *)ctx;
+       struct radeon_winsys_cs *cs = sctx->b.dma.cs;
+       unsigned i, ncopy, csize;
+       struct r600_resource *rdst = r600_resource(dst);
+
+       if (!cs || offset % 4 != 0 || size % 4 != 0) {
+               ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
+               return;
+       }
+
+       /* Mark the buffer range of destination as valid (initialized),
+        * so that transfer_map knows it should wait for the GPU when mapping
+        * that range. */
+       util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+
+       offset += rdst->gpu_address;
+
+       /* the same maximum size as for copying */
+       ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
+       r600_need_dma_space(&sctx->b, ncopy * 5, rdst, NULL);
+
+       for (i = 0; i < ncopy; i++) {
+               csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE);
+               radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0,
+                                               0x8000 /* dword copy */));
+               radeon_emit(cs, offset);
+               radeon_emit(cs, offset >> 32);
+               radeon_emit(cs, clear_value);
+               radeon_emit(cs, csize);
+               offset += csize;
+               size -= csize;
+       }
+       r600_dma_emit_wait_idle(&sctx->b);
+}
+
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
 {
        width = u_minify(width, level);
@@ -525,4 +566,5 @@ fallback:
 void cik_init_sdma_functions(struct si_context *sctx)
 {
        sctx->b.dma_copy = cik_sdma_copy;
+       sctx->b.dma_clear_buffer = cik_sdma_clear_buffer;
 }