From: Pierre-Eric Pelloux-Prayer Date: Tue, 15 Oct 2019 13:19:22 +0000 (+0200) Subject: radeonsi: align sdma byte count to dw X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=21b9a6b59019fe232beb8e82fc0eb231e56df268;p=mesa.git radeonsi: align sdma byte count to dw If src/dst addresses are dw aligned and size is > 4 then we align byte count to dw as well. PAL implementation works like this. Reviewed-by: Marek Olšák --- diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index f45903affa3..af905f66c99 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -35,6 +35,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, { struct radeon_cmdbuf *cs = ctx->dma_cs; unsigned i, ncopy, csize; + unsigned align = ~0u; struct si_resource *sdst = si_resource(dst); struct si_resource *ssrc = si_resource(src); @@ -48,10 +49,20 @@ static void cik_sdma_copy_buffer(struct si_context *ctx, src_offset += ssrc->gpu_address; ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + + /* Align copy size to dw if src/dst address are dw aligned */ + if ((src_offset & 0x3) == 0 && + (dst_offset & 0x3) == 0 && + size > 4 && + (size & 3) != 0) { + align = ~0x3u; + ncopy++; + } + si_need_dma_space(ctx, ncopy * 7, sdst, ssrc); for (i = 0; i < ncopy; i++) { - csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); + csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size; radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, 0));