From dce6cb11961f8e3b9106b89205adea1ee899e599 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 18 Jun 2020 13:07:48 -0400 Subject: [PATCH] turnip: use DIRTY SDS bit to avoid making copies of pipeline load state ib Some testing showed that the DIRTY bit has the desired behavior, so use it to make things a bit simpler. Note in CmdBindPipeline, having the TU_CMD_DIRTY_DESCRIPTOR_SETS behind a if(pipeline->layout->dynamic_offset_count) was wrong. Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 58 +++++++++------------------- src/freedreno/vulkan/tu_private.h | 4 +- 2 files changed, 21 insertions(+), 41 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 8fe404b5e5c..f8d19bf3a56 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -512,6 +512,19 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state) break; } + /* We need to reload the descriptors every time the descriptor sets + * change. However, the commands we send only depend on the pipeline + * because the whole point is to cache descriptors which are used by the + * pipeline. There's a problem here, in that the firmware has an + * "optimization" which skips executing groups that are set to the same + * value as the last draw. This means that if the descriptor sets change + * but not the pipeline, we'd try to re-execute the same buffer which + * the firmware would ignore and we wouldn't pre-load the new + * descriptors. Set the DIRTY bit to avoid this optimization + */ + if (id == TU_DRAW_STATE_DESC_SETS_LOAD) + enable_mask |= CP_SET_DRAW_STATE__0_DIRTY; + tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(state.size) | enable_mask | CP_SET_DRAW_STATE__0_GROUP_ID(id) | @@ -1723,7 +1736,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0); hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f); - cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS; + cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS; } else { assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE); @@ -2027,7 +2040,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS); cmd->state.pipeline = pipeline; - cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS; + cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS; struct tu_cs *cs = &cmd->draw_cs; uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT); @@ -2041,7 +2054,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib); - for_each_bit(i, mask) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]); @@ -2052,10 +2064,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, if (pipeline->vi.bindings_used & ~cmd->vertex_bindings_set) cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; - /* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */ - if (pipeline->layout->dynamic_offset_count) - cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS; - /* dynamic linewidth state depends pipeline state's gras_su_cntl * so the dynamic state ib must be updated when pipeline changes */ @@ -2955,34 +2963,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT); } - if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) { - /* We need to reload the descriptors every time the descriptor sets - * change. However, the commands we send only depend on the pipeline - * because the whole point is to cache descriptors which are used by the - * pipeline. There's a problem here, in that the firmware has an - * "optimization" which skips executing groups that are set to the same - * value as the last draw. This means that if the descriptor sets change - * but not the pipeline, we'd try to re-execute the same buffer which - * the firmware would ignore and we wouldn't pre-load the new - * descriptors. The blob seems to re-emit the LOAD_STATE group whenever - * the descriptor sets change, which we emulate here by copying the - * pre-prepared buffer. - */ - const struct tu_cs_entry *load_entry = &pipeline->load_state.state_ib; - if (load_entry->size > 0) { - struct tu_cs load_cs; - result = tu_cs_begin_sub_stream(&cmd->sub_cs, load_entry->size, &load_cs); - if (result != VK_SUCCESS) - return result; - tu_cs_emit_array(&load_cs, - (uint32_t *)((char *)load_entry->bo->map + load_entry->offset), - load_entry->size / 4); - cmd->state.desc_sets_load_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs); - } else { - cmd->state.desc_sets_load_ib.size = 0; - } - } - if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline); @@ -3023,7 +3003,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib); - tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib); + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state.state_ib); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); @@ -3041,7 +3021,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, uint32_t draw_state_count = has_tess + ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 5 : 0) + - ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 1 : 0) + + ((cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) + ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) + 1; /* vs_params */ @@ -3058,8 +3038,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]); tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]); } - if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) - tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib); + if (cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) + tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state.state_ib); if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib); tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 6e606adb9d6..c0ad2bc041e 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -707,7 +707,7 @@ enum tu_cmd_dirty_bits { TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1, TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, - TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3, + TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3, TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4, TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5, /* all draw states were disabled and need to be re-enabled: */ @@ -848,7 +848,7 @@ struct tu_cmd_state struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; struct tu_cs_entry vertex_buffers_ib; struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES]; - struct tu_cs_entry desc_sets_ib, desc_sets_load_ib; + struct tu_cs_entry desc_sets_ib; struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib; struct tu_draw_state vs_params; -- 2.30.2