From a54bcb9429666fcbe38c04660cc4b3f8abbde259 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 3 Sep 2019 17:54:47 -0400 Subject: [PATCH] radeonsi: enable larger SDMA clears and copies on gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/sid.h | 7 ++++--- src/gallium/drivers/radeonsi/si_dma_cs.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 1f54f94ae7b..d77d2ff5750 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -298,9 +298,10 @@ #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2 #define CIK_SDMA_PACKET_SRBM_WRITE 0xe -/* There is apparently an undocumented HW "feature" that - prevents the HW from copying past 256 bytes of (1 << 22) */ -#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 +/* There is apparently an undocumented HW limitation that + prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */ +#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/ +#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */ enum amd_cmp_class_flags { S_NAN = 1 << 0, // Signaling NaN diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index 195cc6468fc..593bc0bad18 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -103,13 +103,16 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, ui return; } - /* The following code is for Sea Islands and later. */ + /* The following code is for CI and later. */ /* the same maximum size as for copying */ - ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + unsigned max_size_per_packet = sctx->chip_class >= GFX10_3 ? + GFX103_SDMA_COPY_MAX_SIZE : + CIK_SDMA_COPY_MAX_SIZE; + ncopy = DIV_ROUND_UP(size, max_size_per_packet); si_need_dma_space(sctx, ncopy * 5, sdst, NULL); for (i = 0; i < ncopy; i++) { - csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); + csize = MIN2(size, max_size_per_packet); radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_PACKET_CONSTANT_FILL, 0, 0x8000 /* dword copy */)); radeon_emit(cs, offset); radeon_emit(cs, offset >> 32); @@ -176,8 +179,11 @@ void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, } /* The following code is for CI and later. */ + unsigned max_size_per_packet = sctx->chip_class >= GFX10_3 ? + GFX103_SDMA_COPY_MAX_SIZE : + CIK_SDMA_COPY_MAX_SIZE; unsigned align = ~0u; - ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + ncopy = DIV_ROUND_UP(size, max_size_per_packet); /* Align copy size to dw if src/dst address are dw aligned */ if ((src_offset & 0x3) == 0 && (dst_offset & 0x3) == 0 && size > 4 && (size & 3) != 0) { @@ -188,7 +194,7 @@ void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, si_need_dma_space(sctx, ncopy * 7, sdst, ssrc); for (i = 0; i < ncopy; i++) { - csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size; + csize = size >= 4 ? MIN2(size & align, max_size_per_packet) : size; radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (sctx->ws->cs_is_secure(cs) ? 1u : 0) << 2)); radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize); -- 2.30.2