radv: Emit a BATCH_BREAK when changing pixel shaders or CB_TARGET_MASK.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 5 Jan 2020 14:03:51 +0000 (15:03 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 7 Jan 2020 21:44:31 +0000 (22:44 +0100)
Fixes a hang on Raven with Resident Evil 2.

I did not find anything more restricted to fix it:

- Setting persistent_states_per_bin to 1 fixes it too,
  but likely does an internal break on any descriptor set changes
  too.
- Only breaking the batch when cb_target_mask changes does not fix
  it (and looking at AMDVLK comments, I suspect the code in radeonsi
  should really be doing a FLUSH_DFSM).
- Always doing a FLUSH_DFSM on shader switch helps, but that is more
  often than this and I don't think we should be doing that when DFSM
  is disabled.
- Also emitting the existing break on framebuffer change when DFSM is
  disabled does not fix the issue.

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2315
CC: <mesa-stable@lists.freedesktop.org>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h

index b1d83f26da7c26a8699e061f215e839d0e568f74..85dfd266ab3a3a8894553cb38ee0c519b09cbf11 100644 (file)
@@ -1125,6 +1125,33 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
        cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
+static void
+radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer)
+{
+       if (!cmd_buffer->device->pbb_allowed)
+               return;
+
+        struct radv_binning_settings settings =
+                radv_get_binning_settings(cmd_buffer->device->physical_device);
+       bool break_for_new_ps =
+               (!cmd_buffer->state.emitted_pipeline ||
+                cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
+                cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
+               (settings.context_states_per_bin > 1 ||
+                settings.persistent_states_per_bin > 1);
+       bool break_for_new_cb_target_mask =
+               (!cmd_buffer->state.emitted_pipeline ||
+                cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
+                cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
+                settings.context_states_per_bin > 1;
+
+       if (!break_for_new_ps && !break_for_new_cb_target_mask)
+               return;
+
+       radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+       radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+}
+
 static void
 radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -1157,6 +1184,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
                cmd_buffer->state.context_roll_without_scissor_emitted = true;
        }
 
+       radv_emit_batch_break_on_new_ps(cmd_buffer);
+
        for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
                if (!pipeline->shaders[i])
                        continue;
index 7be3c64fd6326af5e605f070a3083f45e5902535..4c20864f1d4383d4d1dfe9e911a8297251299034 100644 (file)
@@ -3441,6 +3441,28 @@ radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs,
        pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
 }
 
+struct radv_binning_settings
+radv_get_binning_settings(const struct radv_physical_device *pdev)
+{
+       struct radv_binning_settings settings;
+       if (pdev->rad_info.has_dedicated_vram) {
+               settings.context_states_per_bin = 1;
+               settings.persistent_states_per_bin = 1;
+               settings.fpovs_per_batch = 63;
+       } else {
+               /* The context states are affected by the scissor bug. */
+               settings.context_states_per_bin = 6;
+               /* 32 causes hangs for RAVEN. */
+               settings.persistent_states_per_bin = 16;
+               settings.fpovs_per_batch = 63;
+       }
+
+       if (pdev->rad_info.has_gfx9_scissor_bug)
+               settings.context_states_per_bin = 1;
+
+       return settings;
+}
+
 static void
 radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                                     struct radv_pipeline *pipeline,
@@ -3459,21 +3481,8 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                unreachable("Unhandled generation for binning bin size calculation");
 
        if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
-               unsigned context_states_per_bin; /* allowed range: [1, 6] */
-               unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
-               unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
-
-               if (pipeline->device->physical_device->rad_info.has_dedicated_vram) {
-                       context_states_per_bin = 1;
-                       persistent_states_per_bin = 1;
-                       fpovs_per_batch = 63;
-               } else {
-                       /* The context states are affected by the scissor bug. */
-                       context_states_per_bin = pipeline->device->physical_device->rad_info.has_gfx9_scissor_bug ? 1 : 6;
-                       /* 32 causes hangs for RAVEN. */
-                       persistent_states_per_bin = 16;
-                       fpovs_per_batch = 63;
-               }
+               struct radv_binning_settings settings =
+                       radv_get_binning_settings(pipeline->device->physical_device);
 
                bool disable_start_of_prim = true;
                uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
@@ -3494,10 +3503,10 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
                        S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
                        S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
                        S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
-                       S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
-                       S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
+                       S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
+                       S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
                        S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
-                       S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+                       S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) |
                        S_028C44_OPTIMAL_BIN_SELECTION(1);
 
                pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
index 599c24b1e0f69ff741d70fe687ec077dc6130645..4dccf23acc7a3a4f63708d638bf968c06c4b24e6 100644 (file)
@@ -1685,6 +1685,15 @@ radv_graphics_pipeline_create(VkDevice device,
                              const VkAllocationCallbacks *alloc,
                              VkPipeline *pPipeline);
 
+struct radv_binning_settings {
+       unsigned context_states_per_bin; /* allowed range: [1, 6] */
+       unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+       unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+};
+
+struct radv_binning_settings
+radv_get_binning_settings(const struct radv_physical_device *pdev);
+
 struct vk_format_description;
 uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
                                          int first_non_void);