From 66165634724abc9c1a0249411667045d44d2830f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 18 Feb 2019 16:13:23 +0100 Subject: [PATCH] turnip: Add image->buffer DMA copies. Passes dEQP-VK.api.copy_and_blit.core.image_to_buffer.* --- src/freedreno/vulkan/tu_meta_copy.c | 230 ++++++++++++++++++++++++++-- 1 file changed, 218 insertions(+), 12 deletions(-) diff --git a/src/freedreno/vulkan/tu_meta_copy.c b/src/freedreno/vulkan/tu_meta_copy.c index 0a80aefb80e..321585cb1ca 100644 --- a/src/freedreno/vulkan/tu_meta_copy.c +++ b/src/freedreno/vulkan/tu_meta_copy.c @@ -40,6 +40,29 @@ blit_control(enum a6xx_color_fmt fmt) return blit_cntl; } +static uint32_t tu6_sp_2d_src_format(VkFormat format) +{ + const struct vk_format_description *desc = vk_format_description(format); + uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb); + + int channel = vk_format_get_first_non_void_channel(format); + if (channel < 0) { + /* TODO special format. */ + return reg; + } + if (desc->channel[channel].normalized) { + if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) + reg |= A6XX_SP_2D_SRC_FORMAT_SINT; + reg |= A6XX_SP_2D_SRC_FORMAT_NORM; + } else if (desc->channel[channel].pure_integer) { + if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) + reg |= A6XX_SP_2D_SRC_FORMAT_SINT; + else + reg |= A6XX_SP_2D_SRC_FORMAT_UINT; + } + return reg; +} + static void tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) { @@ -193,6 +216,197 @@ tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); } +static void +tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf, + struct tu_image *src_image, + struct tu_buffer *dst_buffer, + const VkBufferImageCopy *copy_info, + VkFormat format, + uint32_t layer, + uint64_t dst_va) +{ + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset; + unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch * + vk_format_get_blocksize(format); + + unsigned dst_pitch; + unsigned dst_offset = 0; + if (copy_info->imageExtent.height == 1) { + /* Can't find this in the spec, but not having it is sort of insane? */ + assert(dst_va % vk_format_get_blocksize(format) == 0); + + dst_offset = (dst_va & 63) / vk_format_get_blocksize(format); + dst_va &= ~63; + + dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); + } else { + unsigned dst_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format); + assert(!(dst_pitch & 63)); + assert(!(dst_va & 63)); + } + + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); + + /* + * Emit source: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | + A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); + tu_cs_emit(&cmdbuf->cs, + A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | + A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( + src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ + tu_cs_emit_qw(&cmdbuf->cs, src_va); + tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + /* + * Emit destination: + */ + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | + A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); + tu_cs_emit_qw(&cmdbuf->cs, dst_va); + tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + tu_cs_emit(&cmdbuf->cs, 0x00000000); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x + + copy_info->imageExtent.width - 1)); + tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y + + copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0)); + tu_cs_emit(&cmdbuf->cs, + A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) | + A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1)); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); + tu_cs_emit(&cmdbuf->cs, 0x3f); + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); + tu_cs_emit(&cmdbuf->cs, 0); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); + tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0x01000000); + + tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); + tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + tu_cs_emit_wfi(&cmdbuf->cs); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(&cmdbuf->cs, 0); +} + +static void +tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf, + struct tu_image *src_image, + struct tu_buffer *dst_buffer, + const VkBufferImageCopy *copy_info) +{ + tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); + + /* general setup */ + tu_dma_prepare(cmdbuf); + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); + + /* buffer copy setup */ + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); + tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); + + VkFormat format = src_image->vk_format; + const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + + unsigned dst_pixel_stride = copy_info->bufferRowLength + ? copy_info->bufferRowLength + : copy_info->imageExtent.width; + unsigned cpp = vk_format_get_blocksize(format); + unsigned dst_pitch = dst_pixel_stride * cpp; + + + const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { + unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; + uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch; + + if ((dst_pitch & 63) || (dst_va & 63)) { + /* Do a per line copy */ + VkBufferImageCopy line_copy_info = *copy_info; + line_copy_info.imageExtent.height = 1; + for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { + /* + * if dst_va is not aligned the line copy will need to adjust. Give it + * room to do so. + */ + unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0; + line_copy_info.imageOffset.x = copy_info->imageOffset.x; + line_copy_info.imageExtent.width = copy_info->imageExtent.width; + + for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { + tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp); + + line_copy_info.imageOffset.x += max_width; + line_copy_info.imageExtent.width -= max_width; + } + + line_copy_info.imageOffset.y++; + dst_va += dst_pitch; + } + } else { + tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va); + } + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); +} + void tu_CmdCopyBuffer(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, @@ -239,16 +453,6 @@ tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, destImageLayout, regionCount, pRegions); } -static void -meta_copy_image_to_buffer(struct tu_cmd_buffer *cmd_buffer, - struct tu_buffer *buffer, - struct tu_image *image, - VkImageLayout layout, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) -{ -} - void tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, VkImage srcImage, @@ -261,8 +465,10 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_image, src_image, srcImage); TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); - meta_copy_image_to_buffer(cmd_buffer, dst_buffer, src_image, - srcImageLayout, regionCount, pRegions); + for (unsigned i = 0; i < regionCount; ++i) { + tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer, + pRegions + i); + } } static void -- 2.30.2