From 1119fe5c25db7ae6d5bf7480a3277f5ce91097f6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Aug 2018 00:04:11 -0400 Subject: [PATCH] radeonsi: merge SI and CI dma_clear_buffer and remove the callback MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit also use assertions for the requirements that offset and size are a multiple of 4. Tested-by: Dieter Nützel --- src/gallium/drivers/radeon/radeon_video.c | 3 +- src/gallium/drivers/radeonsi/cik_sdma.c | 41 ------------- src/gallium/drivers/radeonsi/si_cp_dma.c | 2 +- src/gallium/drivers/radeonsi/si_dma.c | 40 ------------ src/gallium/drivers/radeonsi/si_dma_cs.c | 61 ++++++++++++++++++- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 5 +- .../drivers/radeonsi/si_test_dma_perf.c | 2 +- 8 files changed, 66 insertions(+), 90 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 749f30c2306..a39ce4cc73e 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -120,8 +120,7 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe { struct si_context *sctx = (struct si_context*)context; - sctx->dma_clear_buffer(sctx, &buffer->res->b.b, 0, - buffer->res->buf->size, 0); + si_sdma_clear_buffer(sctx, &buffer->res->b.b, 0, buffer->res->buf->size, 0); context->flush(context, NULL, 0); } diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 595f8d49a80..1c2fd0f7b1c 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -67,46 +67,6 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, } } -static void cik_sdma_clear_buffer(struct si_context *sctx, - struct pipe_resource *dst, - uint64_t offset, - uint64_t size, - unsigned clear_value) -{ - struct radeon_cmdbuf *cs = sctx->dma_cs; - unsigned i, ncopy, csize; - struct r600_resource *rdst = r600_resource(dst); - - if (!cs || offset % 4 != 0 || size % 4 != 0 || - dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { - sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4); - return; - } - - /* Mark the buffer range of destination as valid (initialized), - * so that transfer_map knows it should wait for the GPU when mapping - * that range. */ - util_range_add(&rdst->valid_buffer_range, offset, offset + size); - - offset += rdst->gpu_address; - - /* the same maximum size as for copying */ - ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); - si_need_dma_space(sctx, ncopy * 5, rdst, NULL); - - for (i = 0; i < ncopy; i++) { - csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); - radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0, - 0x8000 /* dword copy */)); - radeon_emit(cs, offset); - radeon_emit(cs, offset >> 32); - radeon_emit(cs, clear_value); - radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize); - offset += csize; - size -= csize; - } -} - static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w) { width = u_minify(width, level); @@ -554,5 +514,4 @@ fallback: void cik_init_sdma_functions(struct si_context *sctx) { sctx->dma_copy = cik_sdma_copy; - sctx->dma_clear_buffer = cik_sdma_clear_buffer; } diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 486ae75c77f..598d5ecf0dc 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -301,7 +301,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, * of them are moved to SDMA thanks to this. */ !ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf, RADEON_USAGE_READWRITE))) { - sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value); + si_sdma_clear_buffer(sctx, dst, offset, dma_clear_size, value); offset += dma_clear_size; size -= dma_clear_size; diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index da5bd47b5dd..046d8445ce3 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -77,45 +77,6 @@ static void si_dma_copy_buffer(struct si_context *ctx, } } -static void si_dma_clear_buffer(struct si_context *sctx, - struct pipe_resource *dst, - uint64_t offset, - uint64_t size, - unsigned clear_value) -{ - struct radeon_cmdbuf *cs = sctx->dma_cs; - unsigned i, ncopy, csize; - struct r600_resource *rdst = r600_resource(dst); - - if (!cs || offset % 4 != 0 || size % 4 != 0 || - dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { - sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4); - return; - } - - /* Mark the buffer range of destination as valid (initialized), - * so that transfer_map knows it should wait for the GPU when mapping - * that range. */ - util_range_add(&rdst->valid_buffer_range, offset, offset + size); - - offset += rdst->gpu_address; - - /* the same maximum size as for copying */ - ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); - si_need_dma_space(sctx, ncopy * 4, rdst, NULL); - - for (i = 0; i < ncopy; i++) { - csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); - radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0, - csize / 4)); - radeon_emit(cs, offset); - radeon_emit(cs, clear_value); - radeon_emit(cs, (offset >> 32) << 16); - offset += csize; - size -= csize; - } -} - static void si_dma_copy_tile(struct si_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -325,5 +286,4 @@ fallback: void si_init_dma_functions(struct si_context *sctx) { sctx->dma_copy = si_dma_copy; - sctx->dma_clear_buffer = si_dma_clear_buffer; } diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index 7db9570af3c..ffa2f5ae69b 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -64,6 +64,65 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, radeon_emit(cs, va >> 32); } +void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned clear_value) +{ + struct radeon_cmdbuf *cs = sctx->dma_cs; + unsigned i, ncopy, csize; + struct r600_resource *rdst = r600_resource(dst); + + assert(offset % 4 == 0); + assert(size); + assert(size % 4 == 0); + + if (!cs || dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { + sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4); + return; + } + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(&rdst->valid_buffer_range, offset, offset + size); + + offset += rdst->gpu_address; + + if (sctx->chip_class == SI) { + /* the same maximum size as for copying */ + ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); + si_need_dma_space(sctx, ncopy * 4, rdst, NULL); + + for (i = 0; i < ncopy; i++) { + csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); + radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_CONSTANT_FILL, 0, + csize / 4)); + radeon_emit(cs, offset); + radeon_emit(cs, clear_value); + radeon_emit(cs, (offset >> 32) << 16); + offset += csize; + size -= csize; + } + return; + } + + /* The following code is for CI, VI, Vega/Raven, etc. */ + /* the same maximum size as for copying */ + ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + si_need_dma_space(sctx, ncopy * 5, rdst, NULL); + + for (i = 0; i < ncopy; i++) { + csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0, + 0x8000 /* dword copy */)); + radeon_emit(cs, offset); + radeon_emit(cs, offset >> 32); + radeon_emit(cs, clear_value); + radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize); + offset += csize; + size -= csize; + } +} + void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src) { @@ -170,7 +229,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst struct si_context *ctx = (struct si_context*)sscreen->aux_context; mtx_lock(&sscreen->aux_context_lock); - ctx->dma_clear_buffer(ctx, dst, offset, size, value); + si_sdma_clear_buffer(ctx, dst, offset, size, value); sscreen->aux_context->flush(sscreen->aux_context, NULL, 0); mtx_unlock(&sscreen->aux_context_lock); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index a5088adcf24..8b9159b4860 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -750,7 +750,7 @@ static void si_test_vmfault(struct si_screen *sscreen) puts("VM fault test: CP - done."); } if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) { - sctx->dma_clear_buffer(sctx, buf, 0, 4, 0); + si_sdma_clear_buffer(sctx, buf, 0, 4, 0); ctx->flush(ctx, NULL, 0); puts("VM fault test: SDMA - done."); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index ef4f06f41d5..a6f09b65f74 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1029,9 +1029,6 @@ struct si_context { unsigned src_level, const struct pipe_box *src_box); - void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value); - struct si_tracked_regs tracked_regs; }; @@ -1159,6 +1156,8 @@ void si_init_dma_functions(struct si_context *sctx); /* si_dma_cs.c */ void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, uint64_t offset); +void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned clear_value); void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); void si_flush_dma_cs(struct si_context *ctx, unsigned flags, diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index be2ad079e1a..f097a642999 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -191,7 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen) u_box_1d(0, size, &box); sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box); } else { - sctx->dma_clear_buffer(sctx, dst, 0, size, clear_value); + si_sdma_clear_buffer(sctx, dst, 0, size, clear_value); } } else { /* Compute */ -- 2.30.2