X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_cmd_buffer.c;h=20ba975052bc6c35450d103387ccdcbbf3033c27;hb=1b8d99e2885456dcd2d9309f6e1bd7f60d30ed75;hp=f911af866bec6946371a789ab70008b8da09c4a5;hpb=46b7512b0a73b24a00fa9308a44ab4ffe6054874;p=mesa.git diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f911af866be..20ba975052b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -332,8 +332,10 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) } cmd_buffer->push_constant_stages = 0; - cmd_buffer->scratch_size_needed = 0; - cmd_buffer->compute_scratch_size_needed = 0; + cmd_buffer->scratch_size_per_wave_needed = 0; + cmd_buffer->scratch_waves_wanted = 0; + cmd_buffer->compute_scratch_size_per_wave_needed = 0; + cmd_buffer->compute_scratch_waves_wanted = 0; cmd_buffer->esgs_ring_size_needed = 0; cmd_buffer->gsvs_ring_size_needed = 0; cmd_buffer->tess_rings_needed = false; @@ -556,8 +558,9 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, assert(!"invalid ring type"); } - data[0] = (uintptr_t)pipeline; - data[1] = (uintptr_t)pipeline >> 32; + uint64_t pipeline_address = (uintptr_t)pipeline; + data[0] = pipeline_address; + data[1] = pipeline_address >> 32; radv_emit_write_data_packet(cmd_buffer, va, 2, data); } @@ -862,7 +865,6 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { int num_samples = pipeline->graphics.ms.num_samples; - struct radv_multisample_state *ms = &pipeline->graphics.ms; struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions) @@ -871,20 +873,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) return; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2); - radeon_emit(cmd_buffer->cs, ms->pa_sc_line_cntl); - radeon_emit(cmd_buffer->cs, ms->pa_sc_aa_config); - - radeon_set_context_reg(cmd_buffer->cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0); - radv_emit_default_sample_locations(cmd_buffer->cs, num_samples); - /* GFX9: Flush DFSM when the AA mode changes. */ - if (cmd_buffer->device->dfsm_allowed) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); - } - cmd_buffer->state.context_roll_without_scissor_emitted = true; } @@ -1000,6 +990,9 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) unsigned sx_blend_opt_epsilon = 0; unsigned sx_blend_opt_control = 0; + if (!cmd_buffer->state.attachments || !subpass) + return; + for (unsigned i = 0; i < subpass->color_count; ++i) { if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); @@ -1132,6 +1125,33 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.context_roll_without_scissor_emitted = true; } +static void +radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer) +{ + if (!cmd_buffer->device->pbb_allowed) + return; + + struct radv_binning_settings settings = + radv_get_binning_settings(cmd_buffer->device->physical_device); + bool break_for_new_ps = + (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] != + cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) && + (settings.context_states_per_bin > 1 || + settings.persistent_states_per_bin > 1); + bool break_for_new_cb_target_mask = + (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask != + cmd_buffer->state.pipeline->graphics.cb_target_mask) && + settings.context_states_per_bin > 1; + + if (!break_for_new_ps && !break_for_new_cb_target_mask) + return; + + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); +} + static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { @@ -1143,9 +1163,10 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) radv_update_multisample_state(cmd_buffer, pipeline); radv_update_binning_state(cmd_buffer, pipeline); - cmd_buffer->scratch_size_needed = - MAX2(cmd_buffer->scratch_size_needed, - pipeline->max_waves * pipeline->scratch_bytes_per_wave); + cmd_buffer->scratch_size_per_wave_needed = MAX2(cmd_buffer->scratch_size_per_wave_needed, + pipeline->scratch_bytes_per_wave); + cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, + pipeline->max_waves); if (!cmd_buffer->state.emitted_pipeline || cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != @@ -1163,6 +1184,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.context_roll_without_scissor_emitted = true; } + radv_emit_batch_break_on_new_ps(cmd_buffer); + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!pipeline->shaders[i]) continue; @@ -1552,9 +1575,19 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.attachments[att_idx].iview->image != image) return; - radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); - radeon_emit(cs, ds_clear_value.stencil); - radeon_emit(cs, fui(ds_clear_value.depth)); + if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); + radeon_emit(cs, ds_clear_value.stencil); + radeon_emit(cs, fui(ds_clear_value.depth)); + } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1); + radeon_emit(cs, fui(ds_clear_value.depth)); + } else { + assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1); + radeon_emit(cs, ds_clear_value.stencil); + } /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is * only needed when clearing Z to 0.0. @@ -2019,7 +2052,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); } - if (cmd_buffer->device->pbb_allowed) { + if (cmd_buffer->device->dfsm_allowed) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } @@ -2312,14 +2345,15 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, return; radv_foreach_stage(stage, stages) { - if (!pipeline->shaders[stage]) + shader = radv_get_shader(pipeline, stage); + if (!shader) continue; - need_push_constants |= pipeline->shaders[stage]->info.loads_push_constants; - need_push_constants |= pipeline->shaders[stage]->info.loads_dynamic_offsets; + need_push_constants |= shader->info.loads_push_constants; + need_push_constants |= shader->info.loads_dynamic_offsets; - uint8_t base = pipeline->shaders[stage]->info.base_inline_push_consts; - uint8_t count = pipeline->shaders[stage]->info.num_inline_push_consts; + uint8_t base = shader->info.base_inline_push_consts; + uint8_t count = shader->info.num_inline_push_consts; radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, @@ -2371,7 +2405,6 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && cmd_buffer->state.pipeline->num_vertex_bindings && radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) { - struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements; unsigned vb_offset; void *vb_ptr; uint32_t i = 0; @@ -2388,6 +2421,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint32_t offset; struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer; uint32_t stride = cmd_buffer->state.pipeline->binding_stride[i]; + unsigned num_records; if (!buffer) continue; @@ -2396,20 +2430,28 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, offset = cmd_buffer->vertex_bindings[i].offset; va += offset + buffer->offset; + + num_records = buffer->size - offset; + if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride) + num_records /= stride; + desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (cmd_buffer->device->physical_device->rad_info.chip_class <= GFX7 && stride) - desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1; - else - desc[2] = buffer->size - offset; + desc[2] = num_records; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* OOB_SELECT chooses the out-of-bounds check: + * - 1: index >= NUM_RECORDS (Structured) + * - 3: offset >= NUM_RECORDS (Raw) + */ + int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW; + desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(1) | + S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | @@ -2515,7 +2557,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(3) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { desc[3] |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); @@ -2844,6 +2886,11 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, break; case VK_ACCESS_SHADER_READ_BIT: flush_bits |= RADV_CMD_FLAG_INV_VCACHE; + /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to + * invalidate the scalar cache. */ + if (cmd_buffer->device->physical_device->use_aco && + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) + flush_bits |= RADV_CMD_FLAG_INV_SCACHE; if (!image_is_coherent) flush_bits |= RADV_CMD_FLAG_INV_L2; @@ -2939,7 +2986,7 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview; struct radv_sample_locations_state *sample_locs; VkImageSubresourceRange range; - range.aspectMask = 0; + range.aspectMask = view->aspect_mask; range.baseMipLevel = view->base_mip; range.levelCount = 1; range.baseArrayLayer = view->base_layer; @@ -2962,14 +3009,48 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass); - radv_handle_image_transition(cmd_buffer, - view->image, - cmd_buffer->state.attachments[idx].current_layout, - cmd_buffer->state.attachments[idx].current_in_render_loop, - att.layout, att.in_render_loop, - 0, 0, &range, sample_locs); + /* Determine if the subpass uses separate depth/stencil layouts. */ + bool uses_separate_depth_stencil_layouts = false; + if ((cmd_buffer->state.attachments[idx].current_layout != + cmd_buffer->state.attachments[idx].current_stencil_layout) || + (att.layout != att.stencil_layout)) { + uses_separate_depth_stencil_layouts = true; + } + + /* For separate layouts, perform depth and stencil transitions + * separately. + */ + if (uses_separate_depth_stencil_layouts && + (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT))) { + /* Depth-only transitions. */ + range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + radv_handle_image_transition(cmd_buffer, + view->image, + cmd_buffer->state.attachments[idx].current_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, + att.layout, att.in_render_loop, + 0, 0, &range, sample_locs); + + /* Stencil-only transitions. */ + range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + radv_handle_image_transition(cmd_buffer, + view->image, + cmd_buffer->state.attachments[idx].current_stencil_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, + att.stencil_layout, att.in_render_loop, + 0, 0, &range, sample_locs); + } else { + radv_handle_image_transition(cmd_buffer, + view->image, + cmd_buffer->state.attachments[idx].current_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, + att.layout, att.in_render_loop, + 0, 0, &range, sample_locs); + } cmd_buffer->state.attachments[idx].current_layout = att.layout; + cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout; cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop; @@ -3071,11 +3152,11 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, const VkRenderPassBeginInfo *info) { struct radv_cmd_state *state = &cmd_buffer->state; - const struct VkRenderPassAttachmentBeginInfoKHR *attachment_info = NULL; + const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL; if (info) { attachment_info = vk_find_struct_const(info->pNext, - RENDER_PASS_ATTACHMENT_BEGIN_INFO_KHR); + RENDER_PASS_ATTACHMENT_BEGIN_INFO); } @@ -3126,6 +3207,7 @@ radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, } state->attachments[i].current_layout = att->initial_layout; + state->attachments[i].current_stencil_layout = att->stencil_initial_layout; state->attachments[i].sample_location.count = 0; struct radv_image_view *iview; @@ -3159,7 +3241,7 @@ VkResult radv_AllocateCommandBuffers( for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - if (!list_empty(&pool->free_cmd_buffers)) { + if (!list_is_empty(&pool->free_cmd_buffers)) { struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); list_del(&cmd_buffer->pool_link); @@ -3449,7 +3531,7 @@ void radv_CmdBindDescriptorSets( if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(3) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | @@ -3657,9 +3739,10 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); - cmd_buffer->compute_scratch_size_needed = - MAX2(cmd_buffer->compute_scratch_size_needed, - pipeline->max_waves * pipeline->scratch_bytes_per_wave); + cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, + pipeline->scratch_bytes_per_wave); + cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, + pipeline->max_waves); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->shaders[MESA_SHADER_COMPUTE]->bo); @@ -3988,10 +4071,14 @@ void radv_CmdExecuteCommands( for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); - primary->scratch_size_needed = MAX2(primary->scratch_size_needed, - secondary->scratch_size_needed); - primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, - secondary->compute_scratch_size_needed); + primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed, + secondary->scratch_size_per_wave_needed); + primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted, + secondary->scratch_waves_wanted); + primary->compute_scratch_size_per_wave_needed = MAX2(primary->compute_scratch_size_per_wave_needed, + secondary->compute_scratch_size_per_wave_needed); + primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted, + secondary->compute_scratch_waves_wanted); if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; @@ -4201,7 +4288,8 @@ radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer) continue; VkImageLayout layout = state->pass->attachments[a].final_layout; - struct radv_subpass_attachment att = { a, layout }; + VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout; + struct radv_subpass_attachment att = { a, layout, stencil_layout }; radv_handle_subpass_image_transition(cmd_buffer, att, false); } } @@ -4231,10 +4319,10 @@ void radv_CmdBeginRenderPass( radv_cmd_buffer_begin_subpass(cmd_buffer, 0); } -void radv_CmdBeginRenderPass2KHR( +void radv_CmdBeginRenderPass2( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo* pRenderPassBeginInfo, - const VkSubpassBeginInfoKHR* pSubpassBeginInfo) + const VkSubpassBeginInfo* pSubpassBeginInfo) { radv_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo, pSubpassBeginInfo->contents); @@ -4251,10 +4339,10 @@ void radv_CmdNextSubpass( radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); } -void radv_CmdNextSubpass2KHR( +void radv_CmdNextSubpass2( VkCommandBuffer commandBuffer, - const VkSubpassBeginInfoKHR* pSubpassBeginInfo, - const VkSubpassEndInfoKHR* pSubpassEndInfo) + const VkSubpassBeginInfo* pSubpassBeginInfo, + const VkSubpassEndInfo* pSubpassEndInfo) { radv_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents); } @@ -4740,7 +4828,7 @@ void radv_CmdDrawIndexedIndirect( radv_draw(cmd_buffer, &info); } -void radv_CmdDrawIndirectCountKHR( +void radv_CmdDrawIndirectCount( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -4764,7 +4852,7 @@ void radv_CmdDrawIndirectCountKHR( radv_draw(cmd_buffer, &info); } -void radv_CmdDrawIndexedIndirectCountKHR( +void radv_CmdDrawIndexedIndirectCount( VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -4829,6 +4917,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25); + if (compute_shader->info.wave_size == 32) { + assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); + dispatch_initiator |= S_00B800_CS_W32_EN(1); + } + if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); @@ -5088,9 +5181,9 @@ void radv_CmdEndRenderPass( cmd_buffer->state.subpass_sample_locs = NULL; } -void radv_CmdEndRenderPass2KHR( +void radv_CmdEndRenderPass2( VkCommandBuffer commandBuffer, - const VkSubpassEndInfoKHR* pSubpassEndInfo) + const VkSubpassEndInfo* pSubpassEndInfo) { radv_CmdEndRenderPass(commandBuffer); } @@ -5104,19 +5197,19 @@ void radv_CmdEndRenderPass2KHR( */ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - const VkImageSubresourceRange *range, - uint32_t clear_word) + const VkImageSubresourceRange *range) { assert(range->baseMipLevel == 0); assert(range->levelCount == 1 || range->levelCount == VK_REMAINING_ARRAY_LAYERS); VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT; struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t htile_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; VkClearDepthStencilValue value = {}; state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, clear_word); + state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value); state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; @@ -5150,18 +5243,10 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe return; if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; - - if (radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop, - dst_queue_mask)) { - clear_value = 0; - } - - radv_initialize_htile(cmd_buffer, image, range, clear_value); + radv_initialize_htile(cmd_buffer, image, range); } else if (!radv_layout_is_htile_compressed(image, src_layout, src_render_loop, src_queue_mask) && radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop, dst_queue_mask)) { - uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; - radv_initialize_htile(cmd_buffer, image, range, clear_value); + radv_initialize_htile(cmd_buffer, image, range); } else if (radv_layout_is_htile_compressed(image, src_layout, src_render_loop, src_queue_mask) && !radv_layout_is_htile_compressed(image, dst_layout, dst_render_loop, dst_queue_mask)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | @@ -6090,6 +6175,8 @@ gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0); + + radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); } } @@ -6153,6 +6240,8 @@ void radv_CmdWriteBufferMarkerAMD( si_emit_cache_flush(cmd_buffer); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12); + if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | @@ -6172,4 +6261,6 @@ void radv_CmdWriteBufferMarkerAMD( va, marker, cmd_buffer->gfx9_eop_bug_va); } + + assert(cmd_buffer->cs->cdw <= cdw_max); }