From e6e97ea92e07b78494f08197d9d5d1f35e1c0b60 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 3 Mar 2020 15:53:20 +0100 Subject: [PATCH] radv/sqtt: describe layout transitions with user markers Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Tested-by: Marge Bot Part-of: --- src/amd/vulkan/layers/radv_sqtt_layer.c | 60 +++++++++++++++++++++++++ src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++++- src/amd/vulkan/radv_meta_decompress.c | 10 +++++ src/amd/vulkan/radv_meta_fast_clear.c | 14 ++++++ src/amd/vulkan/radv_private.h | 20 +++++++++ 5 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 5f61937367f..0e0551faf81 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -320,6 +320,37 @@ struct rgp_sqtt_marker_barrier_end { static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); +/** + * "Layout Transition" RGP SQTT instrumentation marker (Table 7) + */ +struct rgp_sqtt_marker_layout_transition { + union { + struct { + uint32_t identifier : 4; + uint32_t ext_dwords : 3; + uint32_t depth_stencil_expand : 1; + uint32_t htile_hiz_range_expand : 1; + uint32_t depth_stencil_resummarize : 1; + uint32_t dcc_decompress : 1; + uint32_t fmask_decompress : 1; + uint32_t fast_clear_eliminate : 1; + uint32_t fmask_color_expand : 1; + uint32_t init_mask_ram : 1; + uint32_t reserved1 : 17; + }; + uint32_t dword01; + }; + union { + struct { + uint32_t reserved2 : 32; + }; + uint32_t dword02; + }; +}; + +static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, + "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); + static void radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, enum rgp_sqtt_marker_general_api_type api_type) @@ -506,9 +537,38 @@ radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; marker.cb_id = 0; + marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; + /* TODO: fill pipeline stalls, cache flushes, etc */ radv_emit_thread_trace_userdata(cs, &marker, sizeof(marker) / 4); + + cmd_buffer->state.num_layout_transitions = 0; +} + +void +radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, + const struct radv_barrier_data *barrier) +{ + struct rgp_sqtt_marker_layout_transition marker = {}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + + if (likely(!cmd_buffer->device->thread_trace_bo)) + return; + + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; + marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand; + marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand; + marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize; + marker.dcc_decompress = barrier->layout_transitions.dcc_decompress; + marker.fmask_decompress = barrier->layout_transitions.fmask_decompress; + marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate; + marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; + marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; + + radv_emit_thread_trace_userdata(cs, &marker, sizeof(marker) / 4); + + cmd_buffer->state.num_layout_transitions++; } #define EVENT_MARKER(cmd_name, args...) \ diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 170f5c54791..b58b3541c80 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5335,10 +5335,14 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_cmd_state *state = &cmd_buffer->state; uint32_t htile_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f; VkClearDepthStencilValue value = {}; + struct radv_barrier_data barrier = {}; state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value); state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; @@ -5396,10 +5400,14 @@ static void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer, uint32_t value) { struct radv_cmd_state *state = &cmd_buffer->state; + struct radv_barrier_data barrier = {}; state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + state->flush_bits |= radv_clear_cmask(cmd_buffer, image, range, value); state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; @@ -5418,10 +5426,14 @@ void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer, }; uint32_t log2_samples = util_logbase2(image->info.samples); uint32_t value = fmask_clear_values[log2_samples]; + struct radv_barrier_data barrier = {}; state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + state->flush_bits |= radv_clear_fmask(cmd_buffer, image, range, value); state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; @@ -5432,11 +5444,15 @@ void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, const VkImageSubresourceRange *range, uint32_t value) { struct radv_cmd_state *state = &cmd_buffer->state; + struct radv_barrier_data barrier = {}; unsigned size = 0; state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + state->flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value); if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) { @@ -5577,8 +5593,13 @@ static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffe if (fce_eliminate || fmask_expand) radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); - if (fmask_expand) + if (fmask_expand) { + struct radv_barrier_data barrier = {}; + barrier.layout_transitions.fmask_color_expand = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + radv_expand_fmask_image_inplace(cmd_buffer, image, range); + } } } diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c index 43412f69a63..8046adabdba 100644 --- a/src/amd/vulkan/radv_meta_decompress.c +++ b/src/amd/vulkan/radv_meta_decompress.c @@ -559,6 +559,11 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, const VkImageSubresourceRange *subresourceRange, struct radv_sample_locations_state *sample_locs) { + struct radv_barrier_data barrier = {}; + + barrier.layout_transitions.depth_stencil_expand = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS); @@ -569,6 +574,11 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer, const VkImageSubresourceRange *subresourceRange, struct radv_sample_locations_state *sample_locs) { + struct radv_barrier_data barrier = {}; + + barrier.layout_transitions.depth_stencil_resummarize = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE); diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index fc80aaf74c8..8507f41d838 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -783,6 +783,15 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange) { + struct radv_barrier_data barrier = {}; + + if (radv_image_has_fmask(image)) { + barrier.layout_transitions.fmask_decompress = 1; + } else { + barrier.layout_transitions.fast_clear_eliminate = 1; + } + radv_describe_layout_transition(cmd_buffer, &barrier); + radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false); } @@ -928,6 +937,11 @@ radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange) { + struct radv_barrier_data barrier = {}; + + barrier.layout_transitions.dcc_decompress = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); + if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange); else diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 42a1a43249b..924e7262f34 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1316,6 +1316,7 @@ struct radv_cmd_state { /* SQTT related state. */ uint32_t current_event_type; uint32_t num_events; + uint32_t num_layout_transitions; }; struct radv_cmd_pool { @@ -2424,6 +2425,23 @@ int radv_dump_thread_trace(struct radv_device *device, const struct radv_thread_trace *trace); /* radv_sqtt_layer_.c */ +struct radv_barrier_data { + union { + struct { + uint16_t depth_stencil_expand : 1; + uint16_t htile_hiz_range_expand : 1; + uint16_t depth_stencil_resummarize : 1; + uint16_t dcc_decompress : 1; + uint16_t fmask_decompress : 1; + uint16_t fast_clear_eliminate : 1; + uint16_t fmask_color_expand : 1; + uint16_t init_mask_ram : 1; + uint16_t reserved : 8; + }; + uint16_t all; + } layout_transitions; +}; + /** * Value for the reason field of an RGP barrier start marker originating from * the Vulkan client (does not include PAL-defined values). (Table 15) @@ -2458,6 +2476,8 @@ void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason); void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, + const struct radv_barrier_data *barrier); struct radeon_winsys_sem; -- 2.30.2