From f78c4251f106c41432c012a80585d66836ad8f76 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 16 Dec 2019 09:56:06 -0500 Subject: [PATCH] turnip: use common blit path for buffer copy Signed-off-by: Jonathan Marek Reviewed-by: Eric Anholt --- src/freedreno/vulkan/tu_blit.c | 25 +++- src/freedreno/vulkan/tu_blit.h | 1 + src/freedreno/vulkan/tu_meta_copy.c | 193 ++++------------------------ 3 files changed, 53 insertions(+), 166 deletions(-) diff --git a/src/freedreno/vulkan/tu_blit.c b/src/freedreno/vulkan/tu_blit.c index 22918d62995..3cddfa128ca 100644 --- a/src/freedreno/vulkan/tu_blit.c +++ b/src/freedreno/vulkan/tu_blit.c @@ -285,7 +285,30 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt) tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); for (unsigned layer = 0; layer < blt->layers; layer++) { - if ((blt->src.va & 63) || (blt->src.pitch & 63)) { + if (blt->buffer) { + struct tu_blit line_blt = *blt; + uint64_t dst_va = line_blt.dst.va, src_va = line_blt.src.va; + unsigned blocksize = vk_format_get_blocksize(blt->src.fmt); + uint32_t size = line_blt.src.width, tmp; + + while (size) { + line_blt.src.x = (src_va & 63) / blocksize; + line_blt.src.va = src_va & ~63; + tmp = MIN2(size, 0x4000 - line_blt.src.x); + + line_blt.dst.x = (dst_va & 63) / blocksize; + line_blt.dst.va = dst_va & ~63; + tmp = MIN2(tmp, 0x4000 - line_blt.dst.x); + + line_blt.src.width = line_blt.dst.width = tmp; + + emit_blit_step(cmdbuf, &line_blt); + + src_va += tmp * blocksize; + dst_va += tmp * blocksize; + size -= tmp; + } + } else if ((blt->src.va & 63) || (blt->src.pitch & 63)) { /* per line copy path (buffer_to_image) */ assert(blt->type == TU_BLIT_COPY && !blt->src.tiled); struct tu_blit line_blt = *blt; diff --git a/src/freedreno/vulkan/tu_blit.h b/src/freedreno/vulkan/tu_blit.h index acceb1aa8a3..341598179c8 100644 --- a/src/freedreno/vulkan/tu_blit.h +++ b/src/freedreno/vulkan/tu_blit.h @@ -114,6 +114,7 @@ struct tu_blit { uint32_t layers; bool filter; bool stencil_read; + bool buffer; /* 1d copy/clear */ enum a6xx_rotation rotation; uint32_t clear_value[4]; enum tu_blit_type type; diff --git a/src/freedreno/vulkan/tu_meta_copy.c b/src/freedreno/vulkan/tu_meta_copy.c index d316884eeef..ecded029a7a 100644 --- a/src/freedreno/vulkan/tu_meta_copy.c +++ b/src/freedreno/vulkan/tu_meta_copy.c @@ -32,166 +32,34 @@ #include "tu_cs.h" #include "tu_blit.h" -static uint32_t -blit_control(enum a6xx_color_fmt fmt) -{ - unsigned blit_cntl = 0xf00000; - blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt); - blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt)); - return blit_cntl; -} - -static void -tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) -{ - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - tu_cs_emit(&cmdbuf->cs, 0x0); - - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, 0x10000000); -} - static void -tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, - struct tu_bo *src_bo, - uint64_t src_offset, - struct tu_bo *dst_bo, - uint64_t dst_offset, - uint64_t size) +tu_copy_buffer(struct tu_cmd_buffer *cmd, + struct tu_buffer *src, + struct tu_buffer *dst, + const VkBufferCopy *region) { - const unsigned max_size_per_iter = 0x4000 - 0x40; - const unsigned max_iterations = - (size + max_size_per_iter) / max_size_per_iter; - - tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ); - tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE); - - tu_dma_prepare(cmdbuf); - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations); - - /* buffer copy setup */ - tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); - tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - - const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - for (; size;) { - uint64_t src_va = src_bo->iova + src_offset; - uint64_t dst_va = dst_bo->iova + dst_offset; - - unsigned src_shift = src_va & 0x3f; - unsigned dst_shift = dst_va & 0x3f; - unsigned max_shift = MAX2(src_shift, dst_shift); - - src_va -= src_shift; - dst_va -= dst_shift; - - uint32_t size_todo = MIN2(0x4000 - max_shift, size); - unsigned pitch = (size_todo + max_shift + 63) & ~63; - - /* - * Emit source: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */ - tu_cs_emit_qw(&cmdbuf->cs, src_va); - tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch)); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - /* - * Emit destination: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) | - A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); - tu_cs_emit_qw(&cmdbuf->cs, dst_va); - - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch)); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - /* - * Blit command: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1)); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) | - A6XX_GRAS_2D_DST_BR_Y(0)); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, 0x3f); - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); - tu_cs_emit(&cmdbuf->cs, 0); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); - tu_cs_emit(&cmdbuf->cs, 0xf180); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0x01000000); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); - tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); - - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0); - - src_offset += size_todo; - dst_offset += size_todo; - size -= size_todo; - } - - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); + tu_bo_list_add(&cmd->bo_list, src->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmd->bo_list, dst->bo, MSM_SUBMIT_BO_WRITE); + + tu_blit(cmd, &(struct tu_blit) { + .dst = { + .fmt = VK_FORMAT_R8_UNORM, + .va = tu_buffer_iova(dst) + region->dstOffset, + .width = region->size, + .height = 1, + .samples = 1, + }, + .src = { + .fmt = VK_FORMAT_R8_UNORM, + .va = tu_buffer_iova(src) + region->srcOffset, + .width = region->size, + .height = 1, + .samples = 1, + }, + .layers = 1, + .type = TU_BLIT_COPY, + .buffer = true, + }); } static struct tu_blit_surf @@ -284,13 +152,8 @@ tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); - for (unsigned i = 0; i < regionCount; ++i) { - uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset; - uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset; - - tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo, - dst_offset, pRegions[i].size); - } + for (unsigned i = 0; i < regionCount; ++i) + tu_copy_buffer(cmdbuf, src_buffer, dst_buffer, &pRegions[i]); } void -- 2.30.2