From af3230e39e9a4fe848e8c859095db8dab6869ccf Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 18 Aug 2020 18:51:46 +0200 Subject: [PATCH] radv: add initial trap handler support with RADV_TRAP_HANDLER=1 A trap handler is used to handle shader exceptions like memory violations, divide by zero etc. The trap handler shader code will help to identify the faulty shader/instruction and to report more information for better debugging. This has only been tested on GFX8, though it should work on GFX6-GFX7. It seems we need a different implemenation for GFX9+. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_debug.c | 56 +++++++++++++++++++++++++++++++ src/amd/vulkan/radv_debug.h | 3 ++ src/amd/vulkan/radv_device.c | 62 +++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 5 +++ 4 files changed, 126 insertions(+) diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 47d70b96499..6ce76b77eaa 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -36,6 +36,7 @@ #include "radv_shader.h" #define TRACE_BO_SIZE 4096 +#define TMA_BO_SIZE 4096 #define COLOR_RESET "\033[0m" #define COLOR_RED "\033[31m" @@ -678,3 +679,58 @@ fail: close(fd); unlink(path); } + +bool +radv_trap_handler_init(struct radv_device *device) +{ + struct radeon_winsys *ws = device->ws; + + /* Create the trap handler shader and upload it like other shaders. */ + device->trap_handler_shader = radv_create_trap_handler_shader(device); + if (!device->trap_handler_shader) { + fprintf(stderr, "radv: failed to create the trap handler shader.\n"); + return false; + } + + device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 8, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_ZERO_VRAM, + RADV_BO_PRIORITY_SCRATCH); + if (!device->tma_bo) + return false; + + device->tma_ptr = ws->buffer_map(device->tma_bo); + if (!device->tma_ptr) + return false; + + /* Upload a buffer descriptor to store various info from the trap. */ + uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16; + uint32_t desc[4]; + + desc[0] = tma_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32); + desc[2] = TMA_BO_SIZE; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + memcpy(device->tma_ptr, desc, sizeof(desc)); + + return true; +} + +void +radv_trap_handler_finish(struct radv_device *device) +{ + struct radeon_winsys *ws = device->ws; + + if (unlikely(device->trap_handler_shader)) + radv_shader_variant_destroy(device, device->trap_handler_shader); + + if (unlikely(device->tma_bo)) + ws->buffer_destroy(device->tma_bo); +} diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 2e7c4694a15..787597f5da9 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -82,4 +82,7 @@ radv_print_spirv(const char *data, uint32_t size, FILE *fp); void radv_dump_enabled_options(struct radv_device *device, FILE *f); +bool radv_trap_handler_init(struct radv_device *device); +void radv_trap_handler_finish(struct radv_device *device); + #endif diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 5b93083913b..68c0ccc2b37 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2817,6 +2817,19 @@ VkResult radv_CreateDevice( goto fail; } + if (getenv("RADV_TRAP_HANDLER")) { + /* TODO: Add support for more hardware. */ + assert(device->physical_device->rad_info.chip_class == GFX8); + + /* To get the disassembly of the faulty shaders, we have to + * keep some shader info around. + */ + keep_shader_info = true; + + if (!radv_trap_handler_init(device)) + goto fail; + } + device->keep_shader_info = keep_shader_info; result = radv_device_init_meta(device); if (result != VK_SUCCESS) @@ -2893,6 +2906,8 @@ fail: radv_thread_trace_finish(device); + radv_trap_handler_finish(device); + if (device->trace_bo) device->ws->buffer_destroy(device->trace_bo); @@ -2942,6 +2957,8 @@ void radv_DestroyDevice( VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); + radv_trap_handler_finish(device); + radv_destroy_shader_slabs(device); pthread_cond_destroy(&device->timeline_cond); @@ -3420,6 +3437,50 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, } } +static void +radv_emit_trap_handler(struct radv_queue *queue, + struct radeon_cmdbuf *cs, + struct radeon_winsys_bo *tma_bo) +{ + struct radv_device *device = queue->device; + struct radeon_winsys_bo *tba_bo; + uint64_t tba_va, tma_va; + + if (!device->trap_handler_shader || !tma_bo) + return; + + tba_bo = device->trap_handler_shader->bo; + + tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset; + tma_va = radv_buffer_get_va(tma_bo); + + radv_cs_add_buffer(queue->device->ws, cs, tba_bo); + radv_cs_add_buffer(queue->device->ws, cs, tma_bo); + + if (queue->queue_family_index == RADV_QUEUE_GENERAL) { + uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, + R_00B100_SPI_SHADER_TBA_LO_VS, + R_00B200_SPI_SHADER_TBA_LO_GS, + R_00B300_SPI_SHADER_TBA_LO_ES, + R_00B400_SPI_SHADER_TBA_LO_HS, + R_00B500_SPI_SHADER_TBA_LO_LS}; + + for (int i = 0; i < ARRAY_SIZE(regs); ++i) { + radeon_set_sh_reg_seq(cs, regs[i], 4); + radeon_emit(cs, tba_va >> 8); + radeon_emit(cs, tba_va >> 40); + radeon_emit(cs, tma_va >> 8); + radeon_emit(cs, tma_va >> 40); + } + } else { + radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4); + radeon_emit(cs, tba_va >> 8); + radeon_emit(cs, tba_va >> 40); + radeon_emit(cs, tma_va >> 8); + radeon_emit(cs, tma_va >> 40); + } +} + static void radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue) { @@ -3724,6 +3785,7 @@ radv_get_preamble_cs(struct radv_queue *queue, compute_scratch_waves, compute_scratch_bo); radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo); + radv_emit_trap_handler(queue, cs, queue->device->tma_bo); if (gds_bo) radv_cs_add_buffer(queue->device->ws, cs, gds_bo); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1f5c4403b4e..e8383c910b2 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -847,6 +847,11 @@ struct radv_device { uint32_t thread_trace_buffer_size; int thread_trace_start_frame; + /* Trap handler. */ + struct radv_shader_variant *trap_handler_shader; + struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */ + uint32_t *tma_ptr; + /* Overallocation. */ bool overallocation_disallowed; uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS]; -- 2.30.2