turnip: use DIRTY SDS bit to avoid making copies of pipeline load state ib
authorJonathan Marek <jonathan@marek.ca>
Thu, 18 Jun 2020 17:07:48 +0000 (13:07 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 14 Jul 2020 17:00:07 +0000 (17:00 +0000)
Some testing showed that the DIRTY bit has the desired behavior, so use it
to make things a bit simpler.

Note in CmdBindPipeline, having the TU_CMD_DIRTY_DESCRIPTOR_SETS behind a
if(pipeline->layout->dynamic_offset_count) was wrong.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5558>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_private.h

index 8fe404b5e5ce66ee1f32a3feb55b37e63d726b00..f8d19bf3a569d75a98bfec2a31b837f64e15fb87 100644 (file)
@@ -512,6 +512,19 @@ tu_cs_emit_draw_state(struct tu_cs *cs, uint32_t id, struct tu_draw_state state)
       break;
    }
 
+   /* We need to reload the descriptors every time the descriptor sets
+    * change. However, the commands we send only depend on the pipeline
+    * because the whole point is to cache descriptors which are used by the
+    * pipeline. There's a problem here, in that the firmware has an
+    * "optimization" which skips executing groups that are set to the same
+    * value as the last draw. This means that if the descriptor sets change
+    * but not the pipeline, we'd try to re-execute the same buffer which
+    * the firmware would ignore and we wouldn't pre-load the new
+    * descriptors. Set the DIRTY bit to avoid this optimization
+    */
+   if (id == TU_DRAW_STATE_DESC_SETS_LOAD)
+      enable_mask |= CP_SET_DRAW_STATE__0_DIRTY;
+
    tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(state.size) |
                   enable_mask |
                   CP_SET_DRAW_STATE__0_GROUP_ID(id) |
@@ -1723,7 +1736,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
       hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
 
-      cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
+      cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS;
    } else {
       assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE);
 
@@ -2027,7 +2040,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS);
 
    cmd->state.pipeline = pipeline;
-   cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
+   cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS;
 
    struct tu_cs *cs = &cmd->draw_cs;
    uint32_t mask = ~pipeline->dynamic_state_mask & BITFIELD_MASK(TU_DYNAMIC_STATE_COUNT);
@@ -2041,7 +2054,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_RAST, pipeline->rast.state_ib);
    tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DS, pipeline->ds.state_ib);
    tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_BLEND, pipeline->blend.state_ib);
-
    for_each_bit(i, mask)
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]);
 
@@ -2052,10 +2064,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
    if (pipeline->vi.bindings_used & ~cmd->vertex_bindings_set)
       cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
 
-   /* If the pipeline needs a dynamic descriptor, re-emit descriptor sets */
-   if (pipeline->layout->dynamic_offset_count)
-      cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS;
-
    /* dynamic linewidth state depends pipeline state's gras_su_cntl
     * so the dynamic state ib must be updated when pipeline changes
     */
@@ -2955,34 +2963,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
          tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT);
    }
 
-   if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) {
-      /* We need to reload the descriptors every time the descriptor sets
-       * change. However, the commands we send only depend on the pipeline
-       * because the whole point is to cache descriptors which are used by the
-       * pipeline. There's a problem here, in that the firmware has an
-       * "optimization" which skips executing groups that are set to the same
-       * value as the last draw. This means that if the descriptor sets change
-       * but not the pipeline, we'd try to re-execute the same buffer which
-       * the firmware would ignore and we wouldn't pre-load the new
-       * descriptors. The blob seems to re-emit the LOAD_STATE group whenever
-       * the descriptor sets change, which we emulate here by copying the
-       * pre-prepared buffer.
-       */
-      const struct tu_cs_entry *load_entry = &pipeline->load_state.state_ib;
-      if (load_entry->size > 0) {
-         struct tu_cs load_cs;
-         result = tu_cs_begin_sub_stream(&cmd->sub_cs, load_entry->size, &load_cs);
-         if (result != VK_SUCCESS)
-            return result;
-         tu_cs_emit_array(&load_cs,
-                          (uint32_t *)((char  *)load_entry->bo->map + load_entry->offset),
-                          load_entry->size / 4);
-         cmd->state.desc_sets_load_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &load_cs);
-      } else {
-         cmd->state.desc_sets_load_ib.size = 0;
-      }
-   }
-
    if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
       cmd->state.vertex_buffers_ib = tu6_emit_vertex_buffers(cmd, pipeline);
 
@@ -3023,7 +3003,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets_ib);
-      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
+      tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state.state_ib);
       tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
       tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
 
@@ -3041,7 +3021,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
       uint32_t draw_state_count =
          has_tess +
          ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 5 : 0) +
-         ((cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS) ? 1 : 0) +
+         ((cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) +
          ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
          1; /* vs_params */
 
@@ -3058,8 +3038,8 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_GS_CONST, cmd->state.shader_const_ib[MESA_SHADER_GEOMETRY]);
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_FS_CONST, cmd->state.shader_const_ib[MESA_SHADER_FRAGMENT]);
          }
-         if (cmd->state.dirty & TU_CMD_DIRTY_DESCRIPTOR_SETS)
-            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, cmd->state.desc_sets_load_ib);
+         if (cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD)
+            tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_DESC_SETS_LOAD, pipeline->load_state.state_ib);
          if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
             tu_cs_emit_sds_ib(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers_ib);
          tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
index 6e606adb9d6084bf6124b21e503cf7c3f9e15b49..c0ad2bc041e622fa5de691f5acbac2aed69d1cba 100644 (file)
@@ -707,7 +707,7 @@ enum tu_cmd_dirty_bits
 {
    TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
    TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
-   TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3,
+   TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3,
    TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4,
    TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
    /* all draw states were disabled and need to be re-enabled: */
@@ -848,7 +848,7 @@ struct tu_cmd_state
    struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
    struct tu_cs_entry vertex_buffers_ib;
    struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES];
-   struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
+   struct tu_cs_entry desc_sets_ib;
    struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib;
 
    struct tu_draw_state vs_params;