From: Jonathan Marek Date: Fri, 6 Dec 2019 01:53:34 +0000 (-0500) Subject: turnip: subpass rework X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aac7d6c1dcab12a820ec8d9c40911dd8212ebce6;p=mesa.git turnip: subpass rework A renderpass is a tile load/store cycle. Signed-off-by: Jonathan Marek Reviewed-by: Eric Anholt --- diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 9d466705469..26d460f4143 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -254,37 +254,6 @@ tu_tiling_config_update_pipes(struct tu_tiling_config *tiling, sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); } -static void -tu_tiling_config_update(struct tu_tiling_config *tiling, - const struct tu_device *dev, - const uint32_t *buffer_cpp, - uint32_t buffer_count, - const VkRect2D *render_area) -{ - /* see if there is any real change */ - const bool ra_changed = - render_area && - memcmp(&tiling->render_area, render_area, sizeof(*render_area)); - const bool buf_changed = tiling->buffer_count != buffer_count || - memcmp(tiling->buffer_cpp, buffer_cpp, - sizeof(*buffer_cpp) * buffer_count); - if (!ra_changed && !buf_changed) - return; - - if (ra_changed) - tiling->render_area = *render_area; - - if (buf_changed) { - memcpy(tiling->buffer_cpp, buffer_cpp, - sizeof(*buffer_cpp) * buffer_count); - tiling->buffer_count = buffer_count; - } - - tu_tiling_config_update_tile_layout(tiling, dev); - tu_tiling_config_update_pipe_layout(tiling, dev); - tu_tiling_config_update_pipes(tiling, dev); -} - static void tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, const struct tu_device *dev, @@ -416,10 +385,11 @@ tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview) } static void -tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_zs(struct tu_cmd_buffer *cmd, + const struct tu_subpass *subpass, + struct tu_cs *cs) { const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_subpass *subpass = cmd->state.subpass; const struct tu_tiling_config *tiling = &cmd->state.tiling_config; const uint32_t a = subpass->depth_stencil_attachment.attachment; @@ -457,7 +427,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip))); tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size)); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); - tu_cs_emit(cs, tiling->gmem_offsets[subpass->color_count]); + tu_cs_emit(cs, tiling->gmem_offsets[a]); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); @@ -479,10 +449,11 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } static void -tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_mrt(struct tu_cmd_buffer *cmd, + const struct tu_subpass *subpass, + struct tu_cs *cs) { const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_subpass *subpass = cmd->state.subpass; const struct tu_tiling_config *tiling = &cmd->state.tiling_config; unsigned char mrt_comp[MAX_RTS] = { 0 }; unsigned srgb_cntl = 0; @@ -513,7 +484,7 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size)); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit( - cs, tiling->gmem_offsets[i]); /* RB_MRT[i].BASE_GMEM */ + cs, tiling->gmem_offsets[a]); /* RB_MRT[i].BASE_GMEM */ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb) | @@ -552,11 +523,11 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } static void -tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_msaa(struct tu_cmd_buffer *cmd, + const struct tu_subpass *subpass, + struct tu_cs *cs) { - const struct tu_subpass *subpass = cmd->state.subpass; - const enum a3xx_msaa_samples samples = - tu_msaa_samples(subpass->max_sample_count); + const enum a3xx_msaa_samples samples = tu_msaa_samples(subpass->samples); tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); @@ -615,13 +586,21 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, } static void -tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align) { const VkRect2D *render_area = &cmd->state.tiling_config.render_area; - const uint32_t x1 = render_area->offset.x; - const uint32_t y1 = render_area->offset.y; - const uint32_t x2 = x1 + render_area->extent.width - 1; - const uint32_t y2 = y1 + render_area->extent.height - 1; + uint32_t x1 = render_area->offset.x; + uint32_t y1 = render_area->offset.y; + uint32_t x2 = x1 + render_area->extent.width - 1; + uint32_t y2 = y1 + render_area->extent.height - 1; + + /* TODO: alignment requirement seems to be less than tile_align_w/h */ + if (align) { + x1 = x1 & ~cmd->device->physical_device->tile_align_w; + y1 = y1 & ~cmd->device->physical_device->tile_align_h; + x2 = ALIGN_POT(x2 + 1, cmd->device->physical_device->tile_align_w) - 1; + y2 = ALIGN_POT(y2 + 1, cmd->device->physical_device->tile_align_h) - 1; + } tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); tu_cs_emit(cs, @@ -635,10 +614,10 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_image_view *iview, uint32_t gmem_offset, - uint32_t blit_info) + bool resolve) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); - tu_cs_emit(cs, blit_info); + tu_cs_emit(cs, resolve ? 0 : (A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM)); const struct tu_native_format *format = tu6_get_native_format(iview->vk_format); @@ -666,39 +645,6 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, gmem_offset); } -static void -tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t gmem_offset, - const VkClearValue *clear_value) -{ - const struct tu_native_format *format = - tu6_get_native_format(iview->vk_format); - assert(format && format->rb >= 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, gmem_offset); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); - tu_cs_emit(cs, 0); - - uint32_t clear_vals[4] = { 0 }; - tu_pack_clear_value(clear_value, iview->vk_format, clear_vals); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); - tu_cs_emit(cs, clear_vals[0]); - tu_cs_emit(cs, clear_vals[1]); - tu_cs_emit(cs, clear_vals[2]); - tu_cs_emit(cs, clear_vals[3]); -} - static void tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { @@ -837,69 +783,120 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, } static void -tu6_emit_tile_load_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t a, - uint32_t gmem_index) +tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a) { - const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - const struct tu_attachment_state *attachments = cmd->state.attachments; - + const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_attachment_state *att = attachments + a; - if (att->pending_clear_aspects) { - tu6_emit_blit_clear(cmd, cs, iview, - tiling->gmem_offsets[gmem_index], - &att->clear_value); - } else { - tu6_emit_blit_info(cmd, cs, iview, - tiling->gmem_offsets[gmem_index], - A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); - } + const struct tu_render_pass_attachment *attachment = + &cmd->state.pass->attachments[a]; - tu6_emit_blit(cmd, cs); + if (!attachment->needs_gmem) + return; + + const uint32_t x1 = tiling->render_area.offset.x; + const uint32_t y1 = tiling->render_area.offset.y; + const uint32_t x2 = x1 + tiling->render_area.extent.width; + const uint32_t y2 = y1 + tiling->render_area.extent.height; + const uint32_t tile_x2 = + tiling->tile0.offset.x + tiling->tile0.extent.width * tiling->tile_count.width; + const uint32_t tile_y2 = + tiling->tile0.offset.y + tiling->tile0.extent.height * tiling->tile_count.height; + bool need_load = + x1 != tiling->tile0.offset.x || x2 != MIN2(fb->width, tile_x2) || + y1 != tiling->tile0.offset.y || y2 != MIN2(fb->height, tile_y2); + + if (need_load) + tu_finishme("improve handling of unaligned render area"); + + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) + need_load = true; + + if (vk_format_has_stencil(iview->vk_format) && + attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) + need_load = true; + + if (need_load) { + tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[a], false); + tu6_emit_blit(cmd, cs); + } } static void -tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) +tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, + uint32_t a, + const VkRenderPassBeginInfo *info) { - const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + const struct tu_framebuffer *fb = cmd->state.framebuffer; + const struct tu_image_view *iview = fb->attachments[a].attachment; + const struct tu_render_pass_attachment *attachment = + &cmd->state.pass->attachments[a]; + unsigned clear_mask = 0; + + /* note: this means it isn't used by any subpass and shouldn't be cleared anyway */ + if (!attachment->needs_gmem) + return; - tu6_emit_blit_scissor(cmd, cs); + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + clear_mask = 0xf; - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a != VK_ATTACHMENT_UNUSED) - tu6_emit_tile_load_attachment(cmd, cs, a, i); + if (vk_format_has_stencil(iview->vk_format)) { + clear_mask &= 0x1; + if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + clear_mask |= 0x2; } + if (!clear_mask) + return; - const uint32_t a = subpass->depth_stencil_attachment.attachment; - if (a != VK_ATTACHMENT_UNUSED) - tu6_emit_tile_load_attachment(cmd, cs, a, subpass->color_count); + const struct tu_native_format *format = + tu6_get_native_format(iview->vk_format); + assert(format && format->rb >= 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); + tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(clear_mask)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + tu_cs_emit(cs, tiling->gmem_offsets[a]); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); + tu_cs_emit(cs, 0); + + uint32_t clear_vals[4] = { 0 }; + tu_pack_clear_value(&info->pClearValues[a], iview->vk_format, clear_vals); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + tu_cs_emit(cs, clear_vals[0]); + tu_cs_emit(cs, clear_vals[1]); + tu_cs_emit(cs, clear_vals[2]); + tu_cs_emit(cs, clear_vals[3]); + + tu6_emit_blit(cmd, cs); } static void tu6_emit_store_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a, - uint32_t gmem_index) + uint32_t gmem_a) { - const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - - if (a == VK_ATTACHMENT_UNUSED) + if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE) return; - tu6_emit_blit_info(cmd, cs, fb->attachments[a].attachment, - tiling->gmem_offsets[gmem_index], 0); + tu6_emit_blit_info(cmd, cs, + cmd->state.framebuffer->attachments[a].attachment, + cmd->state.tiling_config.gmem_offsets[gmem_a], true); tu6_emit_blit(cmd, cs); } static void tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { - const struct tu_subpass *subpass = cmd->state.subpass; + const struct tu_render_pass *pass = cmd->state.pass; + const struct tu_subpass *subpass = &pass->subpasses[pass->subpass_count-1]; tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | @@ -916,22 +913,21 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); tu6_emit_marker(cmd, cs); - tu6_emit_blit_scissor(cmd, cs); + tu6_emit_blit_scissor(cmd, cs, true); - for (uint32_t i = 0; i < subpass->color_count; ++i) { - tu6_emit_store_attachment(cmd, cs, - subpass->color_attachments[i].attachment, - i); - if (subpass->resolve_attachments) { - tu6_emit_store_attachment(cmd, cs, - subpass->resolve_attachments[i].attachment, - i); - } + for (uint32_t a = 0; a < pass->attachment_count; ++a) { + if (pass->attachments[a].needs_gmem) + tu6_emit_store_attachment(cmd, cs, a, a); } - tu6_emit_store_attachment(cmd, cs, - subpass->depth_stencil_attachment.attachment, - subpass->color_count); + if (subpass->resolve_attachments) { + for (unsigned i = 0; i < subpass->color_count; i++) { + uint32_t a = subpass->resolve_attachments[i].attachment; + if (a != VK_ATTACHMENT_UNUSED) + tu6_emit_store_attachment(cmd, cs, a, + subpass->color_attachments[i].attachment); + } + } } static void @@ -1354,10 +1350,6 @@ tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1); tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */ - tu6_emit_zs(cmd, cs); - tu6_emit_mrt(cmd, cs); - tu6_emit_msaa(cmd, cs); - if (use_hw_binning(cmd)) { tu6_emit_bin_size(cmd, cs, A6XX_RB_BIN_CONTROL_BINNING_PASS | 0x6000000); @@ -1464,11 +1456,13 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd) } static void -tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd) +tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd, + const VkRenderPassBeginInfo *info) { - const uint32_t tile_load_space = 16 + 32 * MAX_RTS; - const struct tu_subpass *subpass = cmd->state.subpass; - struct tu_attachment_state *attachments = cmd->state.attachments; + const uint32_t tile_load_space = + 6 + (23+19) * cmd->state.pass->attachment_count + + 21 + (13 * cmd->state.subpass->color_count + 8) + 11; + struct tu_cs sub_cs; VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, @@ -1478,22 +1472,27 @@ tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd) return; } - /* emit to tile-load sub_cs */ - tu6_emit_tile_load(cmd, &sub_cs); + tu6_emit_blit_scissor(cmd, &sub_cs, true); - cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); + for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) + tu6_emit_load_attachment(cmd, &sub_cs, i); - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a != VK_ATTACHMENT_UNUSED) - attachments[a].pending_clear_aspects = 0; - } + tu6_emit_blit_scissor(cmd, &sub_cs, false); + + for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) + tu6_emit_clear_attachment(cmd, &sub_cs, i, info); + + tu6_emit_zs(cmd, cmd->state.subpass, &sub_cs); + tu6_emit_mrt(cmd, cmd->state.subpass, &sub_cs); + tu6_emit_msaa(cmd, cmd->state.subpass, &sub_cs); + + cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); } static void tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd) { - const uint32_t tile_store_space = 32 + 32 * MAX_RTS; + const uint32_t tile_store_space = 32 + 23 * cmd->state.pass->attachment_count; struct tu_cs sub_cs; VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, @@ -1515,37 +1514,20 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, { const struct tu_device *dev = cmd->device; const struct tu_render_pass *pass = cmd->state.pass; - const struct tu_subpass *subpass = cmd->state.subpass; struct tu_tiling_config *tiling = &cmd->state.tiling_config; - uint32_t buffer_cpp[MAX_RTS + 2]; - uint32_t buffer_count = 0; - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) { - buffer_cpp[buffer_count++] = 0; - continue; - } - - const struct tu_render_pass_attachment *att = &pass->attachments[a]; - buffer_cpp[buffer_count++] = - vk_format_get_blocksize(att->format) * att->samples; + tiling->render_area = *render_area; + for (uint32_t a = 0; a < pass->attachment_count; a++) { + if (pass->attachments[a].needs_gmem) + tiling->buffer_cpp[a] = pass->attachments[a].cpp; + else + tiling->buffer_cpp[a] = 0; } + tiling->buffer_count = pass->attachment_count; - if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { - const uint32_t a = subpass->depth_stencil_attachment.attachment; - const struct tu_render_pass_attachment *att = &pass->attachments[a]; - - /* TODO */ - assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT); - - buffer_cpp[buffer_count++] = - vk_format_get_blocksize(att->format) * att->samples; - } - - tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count, - render_area); + tu_tiling_config_update_tile_layout(tiling, dev); + tu_tiling_config_update_pipe_layout(tiling, dev); + tu_tiling_config_update_pipes(tiling, dev); } const struct tu_dynamic_state default_dynamic_state = { @@ -1804,72 +1786,6 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) return cmd_buffer->record_result; } -static VkResult -tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info) -{ - struct tu_cmd_state *state = &cmd_buffer->state; - const struct tu_framebuffer *fb = state->framebuffer; - const struct tu_render_pass *pass = state->pass; - - for (uint32_t i = 0; i < fb->attachment_count; ++i) { - const struct tu_image_view *iview = fb->attachments[i].attachment; - tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo, - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); - } - - if (pass->attachment_count == 0) { - state->attachments = NULL; - return VK_SUCCESS; - } - - state->attachments = - vk_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * sizeof(state->attachments[0]), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - return cmd_buffer->record_result; - } - - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - const struct tu_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags att_aspects = vk_format_aspects(att->format); - VkImageAspectFlags clear_aspects = 0; - - if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } - - state->attachments[i].pending_clear_aspects = clear_aspects; - state->attachments[i].cleared_views = 0; - if (clear_aspects && info) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } - - state->attachments[i].current_layout = att->initial_layout; - } - - return VK_SUCCESS; -} - VkResult tu_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, @@ -2130,8 +2046,6 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) tu_cs_end(&cmd_buffer->cs); tu_cs_end(&cmd_buffer->draw_cs); - assert(!cmd_buffer->state.attachments); - cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; return cmd_buffer->record_result; @@ -2417,26 +2331,28 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin, VkSubpassContents contents) { - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass); - TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + TU_FROM_HANDLE(tu_framebuffer, fb, pRenderPassBegin->framebuffer); VkResult result; - cmd_buffer->state.pass = pass; - cmd_buffer->state.subpass = pass->subpasses; - cmd_buffer->state.framebuffer = framebuffer; - - result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - if (result != VK_SUCCESS) - return; + cmd->state.pass = pass; + cmd->state.subpass = pass->subpasses; + cmd->state.framebuffer = fb; - tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea); - tu_cmd_prepare_tile_load_ib(cmd_buffer); - tu_cmd_prepare_tile_store_ib(cmd_buffer); + tu_cmd_update_tiling_config(cmd, &pRenderPassBegin->renderArea); + tu_cmd_prepare_tile_load_ib(cmd, pRenderPassBegin); + tu_cmd_prepare_tile_store_ib(cmd); /* note: use_hw_binning only checks tiling config */ - if (use_hw_binning(cmd_buffer)) - cmd_buffer->use_vsc_data = true; + if (use_hw_binning(cmd)) + cmd->use_vsc_data = true; + + for (uint32_t i = 0; i < fb->attachment_count; ++i) { + const struct tu_image_view *iview = fb->attachments[i].attachment; + tu_bo_list_add(&cmd->bo_list, iview->image->bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); + } } void @@ -2452,14 +2368,53 @@ void tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + const struct tu_render_pass *pass = cmd->state.pass; + const struct tu_tiling_config *tiling = &cmd->state.tiling_config; + struct tu_cs *cs = &cmd->draw_cs; - tu_cmd_render_tiles(cmd); + VkResult result = tu_cs_reserve_space(cmd->device, cs, 1024); + if (result != VK_SUCCESS) { + cmd->record_result = result; + return; + } - cmd->state.subpass++; + const struct tu_subpass *subpass = cmd->state.subpass++; + /* TODO: + * if msaa samples change between subpasses, + * attachment store is broken for some attachments + */ + if (subpass->resolve_attachments) { + tu6_emit_blit_scissor(cmd, cs, true); + for (unsigned i = 0; i < subpass->color_count; i++) { + uint32_t a = subpass->resolve_attachments[i].attachment; + if (a != VK_ATTACHMENT_UNUSED) { + tu6_emit_store_attachment(cmd, cs, a, + subpass->color_attachments[i].attachment); + } + } + } - tu_cmd_update_tiling_config(cmd, NULL); - tu_cmd_prepare_tile_load_ib(cmd); - tu_cmd_prepare_tile_store_ib(cmd); + /* emit mrt/zs/msaa state for the subpass that is starting */ + tu6_emit_zs(cmd, cmd->state.subpass, cs); + tu6_emit_mrt(cmd, cmd->state.subpass, cs); + tu6_emit_msaa(cmd, cmd->state.subpass, cs); + + /* TODO: + * since we don't know how to do GMEM->GMEM resolve, + * resolve attachments are resolved to memory then loaded to GMEM again if needed + */ + if (subpass->resolve_attachments) { + for (unsigned i = 0; i < subpass->color_count; i++) { + uint32_t a = subpass->resolve_attachments[i].attachment; + const struct tu_image_view *iview = + cmd->state.framebuffer->attachments[a].attachment; + if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].needs_gmem) { + tu_finishme("missing GMEM->GMEM resolve, performance will suffer\n"); + tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[a], false); + tu6_emit_blit(cmd, cs); + } + } + } } void @@ -3651,9 +3606,6 @@ tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) tu_cs_discard_entries(&cmd_buffer->draw_cs); tu_cs_begin(&cmd_buffer->draw_cs); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - cmd_buffer->state.attachments = NULL; - cmd_buffer->state.pass = NULL; cmd_buffer->state.subpass = NULL; cmd_buffer->state.framebuffer = NULL; diff --git a/src/freedreno/vulkan/tu_meta_clear.c b/src/freedreno/vulkan/tu_meta_clear.c index 48754e46601..c539a1b65c2 100644 --- a/src/freedreno/vulkan/tu_meta_clear.c +++ b/src/freedreno/vulkan/tu_meta_clear.c @@ -126,14 +126,11 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer, tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); for (unsigned j = 0; j < attachmentCount; j++) { - uint32_t index, a; - if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - index = pAttachments[j].colorAttachment; - a = subpass->color_attachments[index].attachment; - } else { - index = subpass->color_count; + uint32_t a; + if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) + a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment; + else a = subpass->depth_stencil_attachment.attachment; - } /* TODO: partial depth/stencil clear? */ VkFormat fmt = cmd->state.pass->attachments[a].format; @@ -147,7 +144,7 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer, tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, cmd->state.tiling_config.gmem_offsets[index]); + tu_cs_emit(cs, cmd->state.tiling_config.gmem_offsets[a]); tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); tu_cs_emit(cs, 0); diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 21ae90361a9..dcb3ab06e06 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -27,6 +27,14 @@ #include "tu_private.h" #include "vk_util.h" +#include "vk_format.h" + +static void update_samples(struct tu_subpass *subpass, + VkSampleCountFlagBits samples) +{ + assert(subpass->samples == 0 || subpass->samples == samples); + subpass->samples = samples; +} VkResult tu_CreateRenderPass(VkDevice _device, @@ -38,9 +46,9 @@ tu_CreateRenderPass(VkDevice _device, struct tu_render_pass *pass; size_t size; size_t attachments_offset; - VkRenderPassMultiviewCreateInfo *multiview_info = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + assert(pCreateInfo->attachmentCount < MAX_ATTACHMENTS); size = sizeof(*pass); size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); @@ -57,29 +65,20 @@ tu_CreateRenderPass(VkDevice _device, pass->subpass_count = pCreateInfo->subpassCount; pass->attachments = (void *) pass + attachments_offset; - vk_foreach_struct(ext, pCreateInfo->pNext) - { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: - multiview_info = (VkRenderPassMultiviewCreateInfo *) ext; - break; - default: - break; - } - } - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; att->format = pCreateInfo->pAttachments[i].format; - att->samples = pCreateInfo->pAttachments[i].samples; + att->cpp = vk_format_get_blocksize(att->format) * + pCreateInfo->pAttachments[i].samples; att->load_op = pCreateInfo->pAttachments[i].loadOp; att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; - att->final_layout = pCreateInfo->pAttachments[i].finalLayout; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + att->store_op = pCreateInfo->pAttachments[i].storeOp; + if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE && + vk_format_has_stencil(att->format)) + att->store_op = VK_ATTACHMENT_STORE_OP_STORE; } + uint32_t subpass_attachment_count = 0; struct tu_subpass_attachment *p; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { @@ -87,8 +86,7 @@ tu_CreateRenderPass(VkDevice _device, subpass_attachment_count += desc->inputAttachmentCount + desc->colorAttachmentCount + - (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (desc->pDepthStencilAttachment != NULL); + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0); } if (subpass_attachment_count) { @@ -106,26 +104,21 @@ tu_CreateRenderPass(VkDevice _device, p = pass->subpass_attachments; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - uint32_t color_sample_count = 1, depth_sample_count = 1; struct tu_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; - if (multiview_info) - subpass->view_mask = multiview_info->pViewMasks[i]; + subpass->samples = 0; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = p; p += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pInputAttachments[j].attachment, - .layout = desc->pInputAttachments[j].layout, - }; - if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) - pass->attachments[desc->pInputAttachments[j].attachment] - .view_mask |= subpass->view_mask; + uint32_t a = desc->pInputAttachments[j].attachment; + subpass->input_attachments[j].attachment = a; + if (a != VK_ATTACHMENT_UNUSED) + pass->attachments[a].needs_gmem = true; } } @@ -134,18 +127,12 @@ tu_CreateRenderPass(VkDevice _device, p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pColorAttachments[j].attachment, - .layout = desc->pColorAttachments[j].layout, - }; - if (desc->pColorAttachments[j].attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pColorAttachments[j].attachment] - .view_mask |= subpass->view_mask; - color_sample_count = - pCreateInfo - ->pAttachments[desc->pColorAttachments[j].attachment] - .samples; + uint32_t a = desc->pColorAttachments[j].attachment; + subpass->color_attachments[j].attachment = a; + + if (a != VK_ATTACHMENT_UNUSED) { + pass->attachments[a].needs_gmem = true; + update_samples(subpass, pCreateInfo->pAttachments[a].samples); } } } @@ -153,59 +140,21 @@ tu_CreateRenderPass(VkDevice _device, subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL; if (desc->pResolveAttachments) { p += desc->colorAttachmentCount; - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pResolveAttachments[j].attachment, - .layout = desc->pResolveAttachments[j].layout, - }; - if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pResolveAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } + subpass->resolve_attachments[j].attachment = + desc->pResolveAttachments[j].attachment; } } - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { - .attachment = desc->pDepthStencilAttachment->attachment, - .layout = desc->pDepthStencilAttachment->layout, - }; - if (desc->pDepthStencilAttachment->attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pDepthStencilAttachment->attachment] - .view_mask |= subpass->view_mask; - depth_sample_count = - pCreateInfo - ->pAttachments[desc->pDepthStencilAttachment->attachment] - .samples; - } - } else { - subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + uint32_t a = desc->pDepthStencilAttachment ? + desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; + subpass->depth_stencil_attachment.attachment = a; + if (a != VK_ATTACHMENT_UNUSED) { + pass->attachments[a].needs_gmem = true; + update_samples(subpass, pCreateInfo->pAttachments[a].samples); } - subpass->max_sample_count = - MAX2(color_sample_count, depth_sample_count); - } - - for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { - uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; - if (dst == VK_SUBPASS_EXTERNAL) { - pass->end_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->end_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->end_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } else { - pass->subpasses[dst].start_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->subpasses[dst].start_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->subpasses[dst].start_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } + subpass->samples = subpass->samples ?: 1; } *pRenderPass = tu_render_pass_to_handle(pass); @@ -224,8 +173,8 @@ tu_CreateRenderPass2KHR(VkDevice _device, size_t size; size_t attachments_offset; - assert(pCreateInfo->sType == - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); + assert(pCreateInfo->attachmentCount < MAX_ATTACHMENTS); size = sizeof(*pass); size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); @@ -246,13 +195,15 @@ tu_CreateRenderPass2KHR(VkDevice _device, struct tu_render_pass_attachment *att = &pass->attachments[i]; att->format = pCreateInfo->pAttachments[i].format; - att->samples = pCreateInfo->pAttachments[i].samples; + att->cpp = vk_format_get_blocksize(att->format) * + pCreateInfo->pAttachments[i].samples; att->load_op = pCreateInfo->pAttachments[i].loadOp; att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; - att->final_layout = pCreateInfo->pAttachments[i].finalLayout; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + att->store_op = pCreateInfo->pAttachments[i].storeOp; + att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + if (pCreateInfo->pAttachments[i].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE && + vk_format_has_stencil(att->format)) + att->store_op = VK_ATTACHMENT_STORE_OP_STORE; } uint32_t subpass_attachment_count = 0; struct tu_subpass_attachment *p; @@ -261,8 +212,7 @@ tu_CreateRenderPass2KHR(VkDevice _device, subpass_attachment_count += desc->inputAttachmentCount + desc->colorAttachmentCount + - (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (desc->pDepthStencilAttachment != NULL); + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0); } if (subpass_attachment_count) { @@ -280,25 +230,21 @@ tu_CreateRenderPass2KHR(VkDevice _device, p = pass->subpass_attachments; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; - uint32_t color_sample_count = 1, depth_sample_count = 1; struct tu_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; - subpass->view_mask = desc->viewMask; + subpass->samples = 0; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = p; p += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pInputAttachments[j].attachment, - .layout = desc->pInputAttachments[j].layout, - }; - if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) - pass->attachments[desc->pInputAttachments[j].attachment] - .view_mask |= subpass->view_mask; + uint32_t a = desc->pInputAttachments[j].attachment; + subpass->input_attachments[j].attachment = a; + if (a != VK_ATTACHMENT_UNUSED) + pass->attachments[a].needs_gmem = true; } } @@ -307,18 +253,12 @@ tu_CreateRenderPass2KHR(VkDevice _device, p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pColorAttachments[j].attachment, - .layout = desc->pColorAttachments[j].layout, - }; - if (desc->pColorAttachments[j].attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pColorAttachments[j].attachment] - .view_mask |= subpass->view_mask; - color_sample_count = - pCreateInfo - ->pAttachments[desc->pColorAttachments[j].attachment] - .samples; + uint32_t a = desc->pColorAttachments[j].attachment; + subpass->color_attachments[j].attachment = a; + + if (a != VK_ATTACHMENT_UNUSED) { + pass->attachments[a].needs_gmem = true; + update_samples(subpass, pCreateInfo->pAttachments[a].samples); } } } @@ -326,59 +266,22 @@ tu_CreateRenderPass2KHR(VkDevice _device, subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL; if (desc->pResolveAttachments) { p += desc->colorAttachmentCount; - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pResolveAttachments[j].attachment, - .layout = desc->pResolveAttachments[j].layout, - }; - if (a != VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pResolveAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } - } - } - - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { - .attachment = desc->pDepthStencilAttachment->attachment, - .layout = desc->pDepthStencilAttachment->layout, - }; - if (desc->pDepthStencilAttachment->attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pDepthStencilAttachment->attachment] - .view_mask |= subpass->view_mask; - depth_sample_count = - pCreateInfo - ->pAttachments[desc->pDepthStencilAttachment->attachment] - .samples; + subpass->resolve_attachments[j].attachment = + desc->pResolveAttachments[j].attachment; } - } else { - subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; } - subpass->max_sample_count = - MAX2(color_sample_count, depth_sample_count); - } - for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { - uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; - if (dst == VK_SUBPASS_EXTERNAL) { - pass->end_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->end_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->end_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } else { - pass->subpasses[dst].start_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->subpasses[dst].start_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->subpasses[dst].start_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; + uint32_t a = desc->pDepthStencilAttachment ? + desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; + subpass->depth_stencil_attachment.attachment = a; + if (a != VK_ATTACHMENT_UNUSED) { + pass->attachments[a].needs_gmem = true; + update_samples(subpass, pCreateInfo->pAttachments[a].samples); } + + subpass->samples = subpass->samples ?: 1; } *pRenderPass = tu_render_pass_to_handle(pass); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 2d9051284e1..e3cc32fdf7c 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -96,6 +96,10 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWS 8 /* The Qualcomm driver exposes 0x20000058 */ #define MAX_STORAGE_BUFFER_RANGE 0x20000000 +/* TODO: this isn't a hardware limit, but for a high # of attachments + * we are missing logic to avoid having them all in GMEM at the same time + */ +#define MAX_ATTACHMENTS 64 #define NUM_DEPTH_CLEAR_PIPELINES 3 @@ -775,19 +779,6 @@ tu_get_debug_option_name(int id); const char * tu_get_perftest_option_name(int id); -/** - * Attachment state when recording a renderpass instance. - * - * The clear value is valid only if there exists a pending clear. - */ -struct tu_attachment_state -{ - VkImageAspectFlags pending_clear_aspects; - uint32_t cleared_views; - VkClearValue clear_value; - VkImageLayout current_layout; -}; - struct tu_descriptor_state { struct tu_descriptor_set *sets[MAX_SETS]; @@ -809,7 +800,7 @@ struct tu_tile struct tu_tiling_config { VkRect2D render_area; - uint32_t buffer_cpp[MAX_RTS + 2]; + uint32_t buffer_cpp[MAX_ATTACHMENTS]; uint32_t buffer_count; /* position and size of the first tile */ @@ -817,7 +808,7 @@ struct tu_tiling_config /* number of tiles */ VkExtent2D tile_count; - uint32_t gmem_offsets[MAX_RTS + 2]; + uint32_t gmem_offsets[MAX_ATTACHMENTS]; /* size of the first VSC pipe */ VkExtent2D pipe0; @@ -869,7 +860,6 @@ struct tu_cmd_state const struct tu_render_pass *pass; const struct tu_subpass *subpass; const struct tu_framebuffer *framebuffer; - struct tu_attachment_state *attachments; struct tu_tiling_config tiling_config; @@ -1483,21 +1473,9 @@ struct tu_framebuffer struct tu_attachment_info attachments[0]; }; -struct tu_subpass_barrier -{ - VkPipelineStageFlags src_stage_mask; - VkAccessFlags src_access_mask; - VkAccessFlags dst_access_mask; -}; - -void -tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, - const struct tu_subpass_barrier *barrier); - struct tu_subpass_attachment { uint32_t attachment; - VkImageLayout layout; }; struct tu_subpass @@ -1509,21 +1487,18 @@ struct tu_subpass struct tu_subpass_attachment *resolve_attachments; struct tu_subpass_attachment depth_stencil_attachment; - struct tu_subpass_barrier start_barrier; - - uint32_t view_mask; - VkSampleCountFlagBits max_sample_count; + VkSampleCountFlagBits samples; }; struct tu_render_pass_attachment { VkFormat format; - uint32_t samples; + uint32_t cpp; VkAttachmentLoadOp load_op; VkAttachmentLoadOp stencil_load_op; - VkImageLayout initial_layout; - VkImageLayout final_layout; - uint32_t view_mask; + VkAttachmentStoreOp store_op; + VkAttachmentStoreOp stencil_store_op; + bool needs_gmem; }; struct tu_render_pass @@ -1532,7 +1507,6 @@ struct tu_render_pass uint32_t subpass_count; struct tu_subpass_attachment *subpass_attachments; struct tu_render_pass_attachment *attachments; - struct tu_subpass_barrier end_barrier; struct tu_subpass subpasses[0]; };