From 85fff42d085185db0ca05798f9c1056981dc528b Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 16 Dec 2019 16:42:35 -0500 Subject: [PATCH] turnip: compute gmem offsets at renderpass creation time This makes it easier to implement secondary command buffers, since we no longer need to know the render area to set the gmem offsets for input attachments and CmdClearAttachments. Signed-off-by: Jonathan Marek Reviewed-by: Eric Anholt Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 67 ++++++++------------------- src/freedreno/vulkan/tu_meta_clear.c | 2 +- src/freedreno/vulkan/tu_pass.c | 68 ++++++++++++++++++++++++---- src/freedreno/vulkan/tu_private.h | 11 +---- 4 files changed, 82 insertions(+), 66 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 272becfcfc9..34c3ea8a436 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -112,28 +112,10 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other) return VK_SUCCESS; } -static VkResult -tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling, - const struct tu_device *dev) -{ - const uint32_t gmem_size = dev->physical_device->gmem_size; - uint32_t offset = 0; - - for (uint32_t i = 0; i < tiling->buffer_count; i++) { - /* 16KB-aligned */ - offset = align(offset, 0x4000); - - tiling->gmem_offsets[i] = offset; - offset += tiling->tile0.extent.width * tiling->tile0.extent.height * - tiling->buffer_cpp[i]; - } - - return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; -} - static void tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, - const struct tu_device *dev) + const struct tu_device *dev, + uint32_t pixels) { const uint32_t tile_align_w = dev->physical_device->tile_align_w; const uint32_t tile_align_h = dev->physical_device->tile_align_h; @@ -169,7 +151,7 @@ tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, } /* do not exceed gmem size */ - while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) { + while (tiling->tile0.extent.width * tiling->tile0.extent.height > pixels) { if (tiling->tile0.extent.width > MAX2(tile_align_w, tiling->tile0.extent.height)) { tiling->tile_count.width++; tiling->tile0.extent.width = @@ -390,7 +372,6 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; const uint32_t a = subpass->depth_stencil_attachment.attachment; if (a == VK_ATTACHMENT_UNUSED) { @@ -427,7 +408,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip))); tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size)); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); - tu_cs_emit(cs, tiling->gmem_offsets[a]); + tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); @@ -454,7 +435,6 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; unsigned char mrt_comp[MAX_RTS] = { 0 }; unsigned srgb_cntl = 0; @@ -483,8 +463,7 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip))); tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size)); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); - tu_cs_emit( - cs, tiling->gmem_offsets[a]); /* RB_MRT[i].BASE_GMEM */ + tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset); tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) | @@ -791,7 +770,7 @@ tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a const struct tu_render_pass_attachment *attachment = &cmd->state.pass->attachments[a]; - if (!attachment->needs_gmem) + if (attachment->gmem_offset < 0) return; const uint32_t x1 = tiling->render_area.offset.x; @@ -817,7 +796,7 @@ tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a need_load = true; if (need_load) { - tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[a], false); + tu6_emit_blit_info(cmd, cs, iview, attachment->gmem_offset, false); tu6_emit_blit(cmd, cs); } } @@ -827,7 +806,6 @@ tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a, const VkRenderPassBeginInfo *info) { - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_image_view *iview = fb->attachments[a].attachment; const struct tu_render_pass_attachment *attachment = @@ -835,7 +813,7 @@ tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, unsigned clear_mask = 0; /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */ - if (!attachment->needs_gmem) + if (attachment->gmem_offset < 0) return; if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) @@ -860,7 +838,7 @@ tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(clear_mask)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, tiling->gmem_offsets[a]); + tu_cs_emit(cs, attachment->gmem_offset); tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); tu_cs_emit(cs, 0); @@ -888,7 +866,7 @@ tu6_emit_store_attachment(struct tu_cmd_buffer *cmd, tu6_emit_blit_info(cmd, cs, cmd->state.framebuffer->attachments[a].attachment, - cmd->state.tiling_config.gmem_offsets[gmem_a], true); + cmd->state.pass->attachments[gmem_a].gmem_offset, true); tu6_emit_blit(cmd, cs); } @@ -916,7 +894,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_blit_scissor(cmd, cs, true); for (uint32_t a = 0; a < pass->attachment_count; ++a) { - if (pass->attachments[a].needs_gmem) + if (pass->attachments[a].gmem_offset >= 0) tu6_emit_store_attachment(cmd, cs, a, a); } @@ -1521,19 +1499,11 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, const VkRect2D *render_area) { const struct tu_device *dev = cmd->device; - const struct tu_render_pass *pass = cmd->state.pass; struct tu_tiling_config *tiling = &cmd->state.tiling_config; tiling->render_area = *render_area; - for (uint32_t a = 0; a < pass->attachment_count; a++) { - if (pass->attachments[a].needs_gmem) - tiling->buffer_cpp[a] = pass->attachments[a].cpp; - else - tiling->buffer_cpp[a] = 0; - } - tiling->buffer_count = pass->attachment_count; - tu_tiling_config_update_tile_layout(tiling, dev); + tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels); tu_tiling_config_update_pipe_layout(tiling, dev); tu_tiling_config_update_pipes(tiling, dev); } @@ -2368,7 +2338,6 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); const struct tu_render_pass *pass = cmd->state.pass; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; struct tu_cs *cs = &cmd->draw_cs; VkResult result = tu_cs_reserve_space(cmd->device, cs, 1024); @@ -2414,9 +2383,9 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) uint32_t a = subpass->resolve_attachments[i].attachment; const struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment; - if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].needs_gmem) { + if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].gmem_offset >= 0) { tu_finishme("missing GMEM->GMEM resolve, performance will suffer\n"); - tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[a], false); + tu6_emit_blit_info(cmd, cs, iview, pass->attachments[a].gmem_offset, false); tu6_emit_blit(cmd, cs); } } @@ -2581,16 +2550,18 @@ write_tex_const(struct tu_cmd_buffer *cmd, const struct tu_tiling_config *tiling = &cmd->state.tiling_config; uint32_t a = cmd->state.subpass->input_attachments[map->value[i] + array_index].attachment; + const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; + + assert(att->gmem_offset >= 0); - assert(cmd->state.pass->attachments[a].needs_gmem); dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); dst[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); dst[2] &= ~(A6XX_TEX_CONST_2_TYPE__MASK | A6XX_TEX_CONST_2_PITCH__MASK); dst[2] |= A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | - A6XX_TEX_CONST_2_PITCH(tiling->tile0.extent.width * tiling->buffer_cpp[a]); + A6XX_TEX_CONST_2_PITCH(tiling->tile0.extent.width * att->cpp); dst[3] = 0; - dst[4] = 0x100000 + tiling->gmem_offsets[a]; + dst[4] = 0x100000 + att->gmem_offset; dst[5] = A6XX_TEX_CONST_5_DEPTH(1); for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++) dst[i] = 0; diff --git a/src/freedreno/vulkan/tu_meta_clear.c b/src/freedreno/vulkan/tu_meta_clear.c index a44f1426570..4b7e116949b 100644 --- a/src/freedreno/vulkan/tu_meta_clear.c +++ b/src/freedreno/vulkan/tu_meta_clear.c @@ -152,7 +152,7 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer, tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(clear_mask)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, cmd->state.tiling_config.gmem_offsets[a]); + tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset); tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); tu_cs_emit(cs, 0); diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index dcb3ab06e06..f8a24fd85cf 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -36,6 +36,54 @@ static void update_samples(struct tu_subpass *subpass, subpass->samples = samples; } +#define GMEM_ALIGN 0x4000 + +static void +compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size) +{ + /* calculate total bytes per pixel */ + uint32_t cpp_total = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + struct tu_render_pass_attachment *att = &pass->attachments[i]; + if (att->gmem_offset >= 0) + cpp_total += att->cpp; + } + + /* no gmem attachments */ + if (cpp_total == 0) { + /* any value non-zero value so tiling config works with no attachments */ + pass->gmem_pixels = 1024*1024; + return; + } + + /* TODO: this algorithm isn't optimal + * for example, two attachments with cpp = {1, 4} + * result: nblocks = {12, 52}, pixels = 196608 + * optimal: nblocks = {13, 51}, pixels = 208896 + */ + uint32_t block_total = 0, gmem_blocks = gmem_size / GMEM_ALIGN; + uint32_t offset = 0, pixels = ~0u; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + struct tu_render_pass_attachment *att = &pass->attachments[i]; + if (att->gmem_offset < 0) + continue; + + att->gmem_offset = offset; + + /* Note: divide by 16 is for GMEM_ALIGN=16k, tile align w=64/h=16 */ + uint32_t align = MAX2(1, att->cpp / 16); + uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); + + gmem_blocks -= nblocks; + cpp_total -= att->cpp; + offset += nblocks * GMEM_ALIGN; + pixels = MIN2(pixels, nblocks * GMEM_ALIGN / att->cpp); + } + + pass->gmem_pixels = pixels; + assert(pixels); +} + VkResult tu_CreateRenderPass(VkDevice _device, const VkRenderPassCreateInfo *pCreateInfo, @@ -48,7 +96,6 @@ tu_CreateRenderPass(VkDevice _device, size_t attachments_offset; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - assert(pCreateInfo->attachmentCount < MAX_ATTACHMENTS); size = sizeof(*pass); size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); @@ -77,6 +124,7 @@ tu_CreateRenderPass(VkDevice _device, if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE && vk_format_has_stencil(att->format)) att->store_op = VK_ATTACHMENT_STORE_OP_STORE; + att->gmem_offset = -1; } uint32_t subpass_attachment_count = 0; @@ -118,7 +166,7 @@ tu_CreateRenderPass(VkDevice _device, uint32_t a = desc->pInputAttachments[j].attachment; subpass->input_attachments[j].attachment = a; if (a != VK_ATTACHMENT_UNUSED) - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; } } @@ -131,7 +179,7 @@ tu_CreateRenderPass(VkDevice _device, subpass->color_attachments[j].attachment = a; if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; update_samples(subpass, pCreateInfo->pAttachments[a].samples); } } @@ -150,7 +198,7 @@ tu_CreateRenderPass(VkDevice _device, desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; subpass->depth_stencil_attachment.attachment = a; if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; update_samples(subpass, pCreateInfo->pAttachments[a].samples); } @@ -159,6 +207,8 @@ tu_CreateRenderPass(VkDevice _device, *pRenderPass = tu_render_pass_to_handle(pass); + compute_gmem_offsets(pass, device->physical_device->gmem_size); + return VK_SUCCESS; } @@ -174,7 +224,6 @@ tu_CreateRenderPass2KHR(VkDevice _device, size_t attachments_offset; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); - assert(pCreateInfo->attachmentCount < MAX_ATTACHMENTS); size = sizeof(*pass); size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); @@ -204,6 +253,7 @@ tu_CreateRenderPass2KHR(VkDevice _device, if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE && vk_format_has_stencil(att->format)) att->store_op = VK_ATTACHMENT_STORE_OP_STORE; + att->gmem_offset = -1; } uint32_t subpass_attachment_count = 0; struct tu_subpass_attachment *p; @@ -244,7 +294,7 @@ tu_CreateRenderPass2KHR(VkDevice _device, uint32_t a = desc->pInputAttachments[j].attachment; subpass->input_attachments[j].attachment = a; if (a != VK_ATTACHMENT_UNUSED) - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; } } @@ -257,7 +307,7 @@ tu_CreateRenderPass2KHR(VkDevice _device, subpass->color_attachments[j].attachment = a; if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; update_samples(subpass, pCreateInfo->pAttachments[a].samples); } } @@ -277,7 +327,7 @@ tu_CreateRenderPass2KHR(VkDevice _device, desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; subpass->depth_stencil_attachment.attachment = a; if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[a].needs_gmem = true; + pass->attachments[a].gmem_offset = 0; update_samples(subpass, pCreateInfo->pAttachments[a].samples); } @@ -286,6 +336,8 @@ tu_CreateRenderPass2KHR(VkDevice _device, *pRenderPass = tu_render_pass_to_handle(pass); + compute_gmem_offsets(pass, device->physical_device->gmem_size); + return VK_SUCCESS; } diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 4f78f7163e5..b8bab7cde52 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -96,10 +96,6 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWS 8 /* The Qualcomm driver exposes 0x20000058 */ #define MAX_STORAGE_BUFFER_RANGE 0x20000000 -/* TODO: this isn't a hardware limit, but for a high # of attachments - * we are missing logic to avoid having them all in GMEM at the same time - */ -#define MAX_ATTACHMENTS 64 #define NUM_DEPTH_CLEAR_PIPELINES 3 @@ -800,16 +796,12 @@ struct tu_tile struct tu_tiling_config { VkRect2D render_area; - uint32_t buffer_cpp[MAX_ATTACHMENTS]; - uint32_t buffer_count; /* position and size of the first tile */ VkRect2D tile0; /* number of tiles */ VkExtent2D tile_count; - uint32_t gmem_offsets[MAX_ATTACHMENTS]; - /* size of the first VSC pipe */ VkExtent2D pipe0; /* number of VSC pipes */ @@ -1501,13 +1493,14 @@ struct tu_render_pass_attachment VkAttachmentLoadOp stencil_load_op; VkAttachmentStoreOp store_op; VkAttachmentStoreOp stencil_store_op; - bool needs_gmem; + int32_t gmem_offset; }; struct tu_render_pass { uint32_t attachment_count; uint32_t subpass_count; + uint32_t gmem_pixels; struct tu_subpass_attachment *subpass_attachments; struct tu_render_pass_attachment *attachments; struct tu_subpass subpasses[0]; -- 2.30.2