radv: improve reporting faulty pipelines when a GPU hang is detected
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 26 Aug 2020 15:34:28 +0000 (17:34 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 1 Sep 2020 06:27:48 +0000 (08:27 +0200)
Because the driver now waits for idle after every draw/dispatch
calls, we shouldn't report gfx pipelines when the GPU hang happens
after a dispatch (or the opposite).

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6471>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.c

index 873549677c505d4be9f0ee546d02d0e7969916cd..792462ed9e2764793e0ddc4d3105e7b5e5953e6f 100644 (file)
@@ -626,14 +626,17 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
 
 static void
 radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
-                  struct radv_pipeline *pipeline, enum ring_type ring)
+                  struct radv_pipeline *pipeline)
 {
        struct radv_device *device = cmd_buffer->device;
+       enum ring_type ring;
        uint32_t data[2];
        uint64_t va;
 
        va = radv_buffer_get_va(device->trace_bo);
 
+       ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+
        switch (ring) {
        case RING_GFX:
                va += 8;
@@ -1313,7 +1316,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
                                   pipeline->gs_copy_shader->bo);
 
        if (unlikely(cmd_buffer->device->trace_bo))
-               radv_save_pipeline(cmd_buffer, pipeline, RING_GFX);
+               radv_save_pipeline(cmd_buffer, pipeline);
 
        cmd_buffer->state.emitted_pipeline = pipeline;
 
@@ -4174,7 +4177,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
                           pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
 
        if (unlikely(cmd_buffer->device->trace_bo))
-               radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+               radv_save_pipeline(cmd_buffer, pipeline);
 }
 
 static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,
index d50480aa47ffbace3cc6ef4a8b0abcf0e8786d06..5bee83021f661dd61f117f3b3fe9c278adc34b84 100644 (file)
@@ -48,8 +48,8 @@
  *
  * [0]: primary trace ID
  * [1]: secondary trace ID
- * [2-3]: 64-bit GFX pipeline pointer
- * [4-5]: 64-bit COMPUTE pipeline pointer
+ * [2-3]: 64-bit GFX ring pipeline pointer
+ * [4-5]: 64-bit COMPUTE ring pipeline pointer
  * [6-7]: 64-bit descriptor set #0 pointer
  * ...
  * [68-69]: 64-bit descriptor set #31 pointer
@@ -459,61 +459,29 @@ radv_dump_shaders(struct radv_pipeline *pipeline,
        }
 }
 
-static void
-radv_dump_pipeline_state(struct radv_pipeline *pipeline,
-                        VkShaderStageFlagBits active_stages, FILE *f)
-{
-       radv_dump_shaders(pipeline, active_stages, f);
-       radv_dump_annotated_shaders(pipeline, active_stages, f);
-}
-
-static void
-radv_dump_graphics_state(struct radv_device *device,
-                        struct radv_pipeline *graphics_pipeline,
-                        struct radv_pipeline *compute_pipeline, FILE *f)
-{
-       VkShaderStageFlagBits active_stages;
-
-       if (graphics_pipeline) {
-               active_stages = graphics_pipeline->active_stages;
-               radv_dump_pipeline_state(graphics_pipeline, active_stages, f);
-       }
-
-       if (compute_pipeline) {
-               active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
-               radv_dump_pipeline_state(compute_pipeline, active_stages, f);
-       }
-
-       radv_dump_descriptors(device, f);
-}
-
-static void
-radv_dump_compute_state(struct radv_device *device,
-                       struct radv_pipeline *compute_pipeline, FILE *f)
-{
-       VkShaderStageFlagBits active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
-
-       if (!compute_pipeline)
-               return;
-
-       radv_dump_pipeline_state(compute_pipeline, active_stages, f);
-       radv_dump_descriptors(device, f);
-}
-
 static struct radv_pipeline *
-radv_get_saved_graphics_pipeline(struct radv_device *device)
+radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
 {
        uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+       int offset = ring == RING_GFX ? 1 : 2;
 
-       return *(struct radv_pipeline **)(ptr + 1);
+       return *(struct radv_pipeline **)(ptr + offset);
 }
 
-static struct radv_pipeline *
-radv_get_saved_compute_pipeline(struct radv_device *device)
+static void
+radv_dump_queue_state(struct radv_queue *queue, FILE *f)
 {
-       uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+       enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+       struct radv_pipeline *pipeline;
 
-       return *(struct radv_pipeline **)(ptr + 2);
+       fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
+
+       pipeline = radv_get_saved_pipeline(queue->device, ring);
+       if (pipeline) {
+               radv_dump_shaders(pipeline, pipeline->active_stages, f);
+               radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
+               radv_dump_descriptors(queue->device, f);
+       }
 }
 
 static void
@@ -596,7 +564,6 @@ radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
 void
 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
 {
-       struct radv_pipeline *graphics_pipeline, *compute_pipeline;
        struct radv_device *device = queue->device;
        enum ring_type ring;
        uint64_t addr;
@@ -611,9 +578,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
        if (!hang_occurred && !vm_fault_occurred)
                return;
 
-       graphics_pipeline = radv_get_saved_graphics_pipeline(device);
-       compute_pipeline = radv_get_saved_compute_pipeline(device);
-
        radv_dump_trace(queue->device, cs);
 
        fprintf(stderr, "GPU hang report:\n\n");
@@ -628,23 +592,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
        }
 
        radv_dump_debug_registers(device, stderr);
-
-       switch (ring) {
-       case RING_GFX:
-               fprintf(stderr, "RING_GFX:\n");
-               radv_dump_graphics_state(queue->device,
-                                        graphics_pipeline, compute_pipeline,
-                                        stderr);
-               break;
-       case RING_COMPUTE:
-               fprintf(stderr, "RING_COMPUTE:\n");
-               radv_dump_compute_state(queue->device,
-                                       compute_pipeline, stderr);
-               break;
-       default:
-               assert(0);
-               break;
-       }
+       radv_dump_queue_state(queue, stderr);
 
        abort();
 }