From aa675cdc91fe1d317650c279b3470c0081e85527 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 26 Aug 2020 17:34:28 +0200 Subject: [PATCH] radv: improve reporting faulty pipelines when a GPU hang is detected Because the driver now waits for idle after every draw/dispatch calls, we shouldn't report gfx pipelines when the GPU hang happens after a dispatch (or the opposite). Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 9 ++-- src/amd/vulkan/radv_debug.c | 88 +++++++------------------------- 2 files changed, 24 insertions(+), 73 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 873549677c5..792462ed9e2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -626,14 +626,17 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, static void radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, enum ring_type ring) + struct radv_pipeline *pipeline) { struct radv_device *device = cmd_buffer->device; + enum ring_type ring; uint32_t data[2]; uint64_t va; va = radv_buffer_get_va(device->trace_bo); + ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); + switch (ring) { case RING_GFX: va += 8; @@ -1313,7 +1316,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) pipeline->gs_copy_shader->bo); if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_pipeline(cmd_buffer, pipeline, RING_GFX); + radv_save_pipeline(cmd_buffer, pipeline); cmd_buffer->state.emitted_pipeline = pipeline; @@ -4174,7 +4177,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) pipeline->shaders[MESA_SHADER_COMPUTE]->bo); if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE); + radv_save_pipeline(cmd_buffer, pipeline); } static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index d50480aa47f..5bee83021f6 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -48,8 +48,8 @@ * * [0]: primary trace ID * [1]: secondary trace ID - * [2-3]: 64-bit GFX pipeline pointer - * [4-5]: 64-bit COMPUTE pipeline pointer + * [2-3]: 64-bit GFX ring pipeline pointer + * [4-5]: 64-bit COMPUTE ring pipeline pointer * [6-7]: 64-bit descriptor set #0 pointer * ... * [68-69]: 64-bit descriptor set #31 pointer @@ -459,61 +459,29 @@ radv_dump_shaders(struct radv_pipeline *pipeline, } } -static void -radv_dump_pipeline_state(struct radv_pipeline *pipeline, - VkShaderStageFlagBits active_stages, FILE *f) -{ - radv_dump_shaders(pipeline, active_stages, f); - radv_dump_annotated_shaders(pipeline, active_stages, f); -} - -static void -radv_dump_graphics_state(struct radv_device *device, - struct radv_pipeline *graphics_pipeline, - struct radv_pipeline *compute_pipeline, FILE *f) -{ - VkShaderStageFlagBits active_stages; - - if (graphics_pipeline) { - active_stages = graphics_pipeline->active_stages; - radv_dump_pipeline_state(graphics_pipeline, active_stages, f); - } - - if (compute_pipeline) { - active_stages = VK_SHADER_STAGE_COMPUTE_BIT; - radv_dump_pipeline_state(compute_pipeline, active_stages, f); - } - - radv_dump_descriptors(device, f); -} - -static void -radv_dump_compute_state(struct radv_device *device, - struct radv_pipeline *compute_pipeline, FILE *f) -{ - VkShaderStageFlagBits active_stages = VK_SHADER_STAGE_COMPUTE_BIT; - - if (!compute_pipeline) - return; - - radv_dump_pipeline_state(compute_pipeline, active_stages, f); - radv_dump_descriptors(device, f); -} - static struct radv_pipeline * -radv_get_saved_graphics_pipeline(struct radv_device *device) +radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring) { uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + int offset = ring == RING_GFX ? 1 : 2; - return *(struct radv_pipeline **)(ptr + 1); + return *(struct radv_pipeline **)(ptr + offset); } -static struct radv_pipeline * -radv_get_saved_compute_pipeline(struct radv_device *device) +static void +radv_dump_queue_state(struct radv_queue *queue, FILE *f) { - uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); + struct radv_pipeline *pipeline; - return *(struct radv_pipeline **)(ptr + 2); + fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); + + pipeline = radv_get_saved_pipeline(queue->device, ring); + if (pipeline) { + radv_dump_shaders(pipeline, pipeline->active_stages, f); + radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f); + radv_dump_descriptors(queue->device, f); + } } static void @@ -596,7 +564,6 @@ radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) void radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) { - struct radv_pipeline *graphics_pipeline, *compute_pipeline; struct radv_device *device = queue->device; enum ring_type ring; uint64_t addr; @@ -611,9 +578,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) if (!hang_occurred && !vm_fault_occurred) return; - graphics_pipeline = radv_get_saved_graphics_pipeline(device); - compute_pipeline = radv_get_saved_compute_pipeline(device); - radv_dump_trace(queue->device, cs); fprintf(stderr, "GPU hang report:\n\n"); @@ -628,23 +592,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) } radv_dump_debug_registers(device, stderr); - - switch (ring) { - case RING_GFX: - fprintf(stderr, "RING_GFX:\n"); - radv_dump_graphics_state(queue->device, - graphics_pipeline, compute_pipeline, - stderr); - break; - case RING_COMPUTE: - fprintf(stderr, "RING_COMPUTE:\n"); - radv_dump_compute_state(queue->device, - compute_pipeline, stderr); - break; - default: - assert(0); - break; - } + radv_dump_queue_state(queue, stderr); abort(); } -- 2.30.2