radv: dump shader stats when a hang occured
[mesa.git] / src / amd / vulkan / radv_debug.c
index 9913c06ec2375039b1ca324abd0c19efddcf3880..ee4c86e7ed3d89248919c15d5811897976abda4a 100644 (file)
 #include <stdlib.h>
 #include <stdio.h>
 
+#include "ac_debug.h"
 #include "radv_debug.h"
+#include "radv_shader.h"
+
+#define TRACE_BO_SIZE 4096
+
+/* Trace BO layout (offsets are 4 bytes):
+ *
+ * [0]: primary trace ID
+ * [1]: secondary trace ID
+ * [2-3]: 64-bit GFX pipeline pointer
+ * [4-5]: 64-bit COMPUTE pipeline pointer
+ */
 
 bool
 radv_init_trace(struct radv_device *device)
 {
        struct radeon_winsys *ws = device->ws;
 
-       device->trace_bo = ws->buffer_create(ws, 4096, 8, RADEON_DOMAIN_VRAM,
+       device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
+                                            RADEON_DOMAIN_VRAM,
                                             RADEON_FLAG_CPU_ACCESS);
        if (!device->trace_bo)
                return false;
@@ -44,10 +57,15 @@ radv_init_trace(struct radv_device *device)
        if (!device->trace_id_ptr)
                return false;
 
+       memset(device->trace_id_ptr, 0, TRACE_BO_SIZE);
+
+       ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+                           &device->dmesg_timestamp, NULL);
+
        return true;
 }
 
-void
+static void
 radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
 {
        const char *filename = getenv("RADV_TRACE_FILE");
@@ -63,6 +81,130 @@ radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
        fclose(f);
 }
 
+static void
+radv_dump_shader(struct radv_pipeline *pipeline,
+                struct radv_shader_variant *shader, gl_shader_stage stage,
+                FILE *f)
+{
+       if (!shader)
+               return;
+
+       fprintf(f, "%s:\n%s\n\n", radv_get_shader_name(shader, stage),
+               shader->disasm_string);
+
+       radv_shader_dump_stats(pipeline->device, shader, stage, f);
+}
+
+static void
+radv_dump_shaders(struct radv_pipeline *pipeline,
+                 struct radv_shader_variant *compute_shader, FILE *f)
+{
+       unsigned mask;
+
+       /* Dump active graphics shaders. */
+       mask = pipeline->active_stages;
+       while (mask) {
+               int stage = u_bit_scan(&mask);
+
+               radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
+       }
+
+       radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f);
+}
+
+static void
+radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline,
+                        struct radv_pipeline *compute_pipeline, FILE *f)
+{
+       struct radv_shader_variant *compute_shader =
+               compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL;
+
+       if (!graphics_pipeline)
+               return;
+
+       radv_dump_shaders(graphics_pipeline, compute_shader, f);
+}
+
+static void
+radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f)
+{
+       if (!compute_pipeline)
+               return;
+
+       radv_dump_shaders(compute_pipeline,
+                         compute_pipeline->shaders[MESA_SHADER_COMPUTE], f);
+}
+
+static struct radv_pipeline *
+radv_get_saved_graphics_pipeline(struct radv_device *device)
+{
+       uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+       return (struct radv_pipeline *)ptr[1];
+}
+
+static struct radv_pipeline *
+radv_get_saved_compute_pipeline(struct radv_device *device)
+{
+       uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+
+       return (struct radv_pipeline *)ptr[2];
+}
+
+static bool
+radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
+{
+       struct radeon_winsys *ws = queue->device->ws;
+
+       if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
+               return true;
+
+       return false;
+}
+
+void
+radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs)
+{
+       struct radv_pipeline *graphics_pipeline, *compute_pipeline;
+       struct radv_device *device = queue->device;
+       enum ring_type ring;
+       uint64_t addr;
+
+       ring = radv_queue_family_to_ring(queue->queue_family_index);
+
+       bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+       bool vm_fault_occurred = false;
+       if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
+               vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+                                                       &device->dmesg_timestamp, &addr);
+       if (!hang_occurred && !vm_fault_occurred)
+               return;
+
+       graphics_pipeline = radv_get_saved_graphics_pipeline(device);
+       compute_pipeline = radv_get_saved_compute_pipeline(device);
+
+       if (vm_fault_occurred) {
+               fprintf(stderr, "VM fault report.\n\n");
+               fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
+       }
+
+       switch (ring) {
+       case RING_GFX:
+               radv_dump_graphics_state(graphics_pipeline, compute_pipeline,
+                                        stderr);
+               break;
+       case RING_COMPUTE:
+               radv_dump_compute_state(compute_pipeline, stderr);
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       radv_dump_trace(queue->device, cs);
+       abort();
+}
+
 void
 radv_print_spirv(struct radv_shader_module *module, FILE *fp)
 {