X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_cmd_buffer.c;h=7454b39a9344ffe50cd9fd403726ec5b8b08fb90;hb=dbac8e25f851ed44c51f3ce8a08b2cdd564c5dd2;hp=ad83bc6c6f781a5761ddfc925ea4e133e9f89dd0;hpb=2193a6a828085e0ba63e2b768c2d86a86639831f;p=mesa.git diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ad83bc6c6f7..7454b39a934 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -421,7 +421,7 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, } static void -radv_emit_write_data_packet(struct radeon_winsys_cs *cs, uint64_t va, +radv_emit_write_data_packet(struct radeon_cmdbuf *cs, uint64_t va, unsigned count, const uint32_t *data) { radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); @@ -436,7 +436,7 @@ radv_emit_write_data_packet(struct radeon_winsys_cs *cs, uint64_t va, void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) { struct radv_device *device = cmd_buffer->device; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va; va = radv_buffer_get_va(device->trace_bo); @@ -486,7 +486,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, enum ring_type ring) { struct radv_device *device = cmd_buffer->device; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t data[2]; uint64_t va; @@ -536,7 +536,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); struct radv_device *device = cmd_buffer->device; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t data[MAX_SETS * 2] = {}; uint64_t va; unsigned i; @@ -589,7 +589,7 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, gl_shader_stage stage) { struct radv_device *device = cmd_buffer->device; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t sh_base = pipeline->user_data_0[stage]; struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs; @@ -1178,11 +1178,12 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, static void radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, - VkClearDepthStencilValue ds_clear_value) + VkClearDepthStencilValue ds_clear_value, + VkImageAspectFlags aspects) { struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; const struct radv_subpass *subpass = cmd_buffer->state.subpass; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_attachment_info *att; uint32_t att_idx; @@ -1200,18 +1201,34 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); radeon_emit(cs, ds_clear_value.stencil); radeon_emit(cs, fui(ds_clear_value.depth)); + + /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is + * only needed when clearing Z to 0.0. + */ + if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + ds_clear_value.depth == 0.0) { + VkImageLayout layout = subpass->depth_stencil_attachment.layout; + + radv_update_zrange_precision(cmd_buffer, &att->ds, image, + layout, false); + } } +/** + * Set the clear depth/stencil values to the image's metadata. + */ void -radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkClearDepthStencilValue ds_clear_value, - VkImageAspectFlags aspects) +radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkClearDepthStencilValue ds_clear_value, + VkImageAspectFlags aspects) { + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; unsigned reg_offset = 0, reg_count = 0; + va += image->offset + image->clear_value_offset; + assert(radv_image_has_htile(image)); if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { @@ -1223,58 +1240,35 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ++reg_count; - radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - radeon_emit(cmd_buffer->cs, ds_clear_value.stencil); + radeon_emit(cs, ds_clear_value.stencil); if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); - - radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value); - - /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is - * only needed when clearing Z to 0.0. - */ - if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - ds_clear_value.depth == 0.0) { - struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - - if (!framebuffer || !subpass) - return; - - if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED) - return; - - int idx = subpass->depth_stencil_attachment.attachment; - VkImageLayout layout = subpass->depth_stencil_attachment.layout; - struct radv_attachment_info *att = &framebuffer->attachments[idx]; - struct radv_image *image = att->attachment->image; - - /* Only needed if the image is currently bound as the depth - * surface. - */ - if (att->attachment->image != image) - return; + radeon_emit(cs, fui(ds_clear_value.depth)); - radv_update_zrange_precision(cmd_buffer, &att->ds, image, - layout, false); - } + radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, + aspects); } +/** + * Load the clear depth/stencil values from the image's metadata. + */ static void -radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image) +radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image) { + struct radeon_cmdbuf *cs = cmd_buffer->cs; VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; unsigned reg_offset = 0, reg_count = 0; + va += image->offset + image->clear_value_offset; + if (!radv_image_has_htile(image)) return; @@ -1287,17 +1281,17 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ++reg_count; - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); - radeon_emit(cmd_buffer->cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); - radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); + radeon_emit(cs, 0); - radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); } /* @@ -1337,7 +1331,7 @@ radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, { struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; const struct radv_subpass *subpass = cmd_buffer->state.subpass; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_attachment_info *att; uint32_t att_idx; @@ -1366,7 +1360,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, int cb_idx, uint32_t color_values[2]) { - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(image->bo); va += image->offset + image->clear_value_offset; @@ -1394,7 +1388,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, int cb_idx) { - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(image->bo); va += image->offset + image->clear_value_offset; @@ -1467,7 +1461,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; cmd_buffer->state.offset_scale = att->ds.offset_scale; } - radv_load_depth_clear_regs(cmd_buffer, image); + radv_load_ds_clear_metadata(cmd_buffer, image); } else { if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); @@ -1492,7 +1486,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) { - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_cmd_state *state = &cmd_buffer->state; if (state->index_type != state->last_index_type) { @@ -1856,7 +1850,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw, { struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t ia_multi_vgt_param; int32_t primitive_reset_en; @@ -3093,7 +3087,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t count_va, uint32_t stride) { - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; bool draw_id_enable = radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.needs_draw_id; @@ -3181,7 +3175,7 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, { struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); @@ -3305,10 +3299,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, if (!cmd_buffer->device->physical_device->has_scissor_bug) return false; + uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; + + /* Index & Vertex buffer don't change context regs, and pipeline is handled later. */ + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE); + /* Assume all state changes except these two can imply context rolls. */ - if (cmd_buffer->state.dirty & ~(RADV_CMD_DIRTY_INDEX_BUFFER | - RADV_CMD_DIRTY_VERTEX_BUFFER | - RADV_CMD_DIRTY_PIPELINE)) + if (cmd_buffer->state.dirty & used_states) return true; if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) @@ -3369,7 +3366,6 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && - cmd_buffer->state.pipeline && cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; MAYBE_UNUSED unsigned cdw_max = @@ -3646,7 +3642,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_userdata_info *loc; loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, @@ -3957,7 +3953,7 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, if (vk_format_is_stencil(image->vk_format)) aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - radv_set_depth_clear_regs(cmd_buffer, image, value, aspects); + radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); } } @@ -4205,7 +4201,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags stageMask, unsigned value) { - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(event->bo); radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo, 8); @@ -4258,7 +4254,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, const VkImageMemoryBarrier* pImageMemoryBarriers) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radeon_winsys_cs *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; for (unsigned i = 0; i < eventCount; ++i) { RADV_FROM_HANDLE(radv_event, event, pEvents[i]);