From a8a696a38f432b67f24e0373343b20404f703d0e Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 4 Apr 2018 12:12:02 +0200 Subject: [PATCH] radv: use a mask for VBOs and shaders prefetching MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Tested-by: Dieter Nützel --- src/amd/vulkan/radv_cmd_buffer.c | 75 +++++++++++++++++++++----------- src/amd/vulkan/radv_private.h | 3 +- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b18718458fe..59e122afcc9 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -37,6 +37,20 @@ #include "ac_debug.h" +enum { + RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), + RADV_PREFETCH_VS = (1 << 1), + RADV_PREFETCH_TCS = (1 << 2), + RADV_PREFETCH_TES = (1 << 3), + RADV_PREFETCH_GS = (1 << 4), + RADV_PREFETCH_PS = (1 << 5), + RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | + RADV_PREFETCH_TCS | + RADV_PREFETCH_TES | + RADV_PREFETCH_GS | + RADV_PREFETCH_PS) +}; + static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout src_layout, @@ -617,17 +631,6 @@ radv_emit_prefetch_TC_L2_async(struct radv_cmd_buffer *cmd_buffer, uint64_t va, si_cp_dma_prefetch(cmd_buffer, va, size); } -static void -radv_emit_VBO_descriptors_prefetch(struct radv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.vb_prefetch_dirty) { - radv_emit_prefetch_TC_L2_async(cmd_buffer, - cmd_buffer->state.vb_va, - cmd_buffer->state.vb_size); - cmd_buffer->state.vb_prefetch_dirty = false; - } -} - static void radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader) @@ -649,18 +652,35 @@ static void radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_VERTEX]); - radv_emit_VBO_descriptors_prefetch(cmd_buffer); - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_TESS_CTRL]); - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_TESS_EVAL]); - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_GEOMETRY]); - radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_FRAGMENT]); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->prefetch_L2_mask & RADV_PREFETCH_VS) + radv_emit_shader_prefetch(cmd_buffer, + pipeline->shaders[MESA_SHADER_VERTEX]); + + if (state->prefetch_L2_mask & RADV_PREFETCH_VBO_DESCRIPTORS) + radv_emit_prefetch_TC_L2_async(cmd_buffer, state->vb_va, + state->vb_size); + + if (state->prefetch_L2_mask & RADV_PREFETCH_TCS) + radv_emit_shader_prefetch(cmd_buffer, + pipeline->shaders[MESA_SHADER_TESS_CTRL]); + + if (state->prefetch_L2_mask & RADV_PREFETCH_TES) + radv_emit_shader_prefetch(cmd_buffer, + pipeline->shaders[MESA_SHADER_TESS_EVAL]); + + if (state->prefetch_L2_mask & RADV_PREFETCH_GS) { + radv_emit_shader_prefetch(cmd_buffer, + pipeline->shaders[MESA_SHADER_GEOMETRY]); + radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); + } + + if (state->prefetch_L2_mask & RADV_PREFETCH_PS) + radv_emit_shader_prefetch(cmd_buffer, + pipeline->shaders[MESA_SHADER_FRAGMENT]); + + state->prefetch_L2_mask = 0; } static void @@ -1504,7 +1524,7 @@ radv_cmd_buffer_update_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bo cmd_buffer->state.vb_va = va; cmd_buffer->state.vb_size = count * 16; - cmd_buffer->state.vb_prefetch_dirty = true; + cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS; } cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; @@ -2310,6 +2330,9 @@ void radv_CmdBindPipeline( cmd_buffer->state.last_first_instance = -1; cmd_buffer->state.last_vertex_offset = -1; + /* Prefetch all pipeline shaders at first draw time. */ + cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; + radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) @@ -3053,7 +3076,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, * run in parallel, but starting the draw first is more * important. */ - if (pipeline_is_dirty) { + if (cmd_buffer->state.prefetch_L2_mask) { radv_emit_prefetch(cmd_buffer, cmd_buffer->state.pipeline); } @@ -3063,7 +3086,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, */ si_emit_cache_flush(cmd_buffer); - if (pipeline_is_dirty) { + if (cmd_buffer->state.prefetch_L2_mask) { radv_emit_prefetch(cmd_buffer, cmd_buffer->state.pipeline); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4485efaa097..ca3beba2d41 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -915,13 +915,14 @@ struct radv_descriptor_state { struct radv_cmd_state { /* Vertex descriptors */ - bool vb_prefetch_dirty; uint64_t vb_va; unsigned vb_size; bool predicating; uint32_t dirty; + uint32_t prefetch_L2_mask; + struct radv_pipeline * pipeline; struct radv_pipeline * emitted_pipeline; struct radv_pipeline * compute_pipeline; -- 2.30.2