From: Samuel Pitoiset Date: Thu, 20 Feb 2020 12:22:31 +0000 (+0100) Subject: radv: allow to capture SQTT traces with RADV_THREAD_TRACE= X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bf16ff317256c208e21362191bb93200925ea944;p=mesa.git radv: allow to capture SQTT traces with RADV_THREAD_TRACE= This is pretty basic (and a bit crappy at the moment). I think we might want some sort of overlay in the future and also be able to trigger captures with F12 or whatever. To record a capture, set RADV_THREAD_TRACE to something greater than zero (eg. RADV_THREAD_TRACE=100 will capture frame #100). If the driver didn't crash (or the GPU didn't hang), the capture file should be stored in /tmp. To open that capture, use Radeon GPU Profiler and enjoy your profiling times with RADV! \o/ Note that thread trace support is quite experimental, only GFX9 is supported at the moment, and a bunch of useful stuff are still missing (shader ISA, pipelines info, etc). More is comming soon. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Tested-by: Marge Bot Part-of: --- diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 04fdb462a92..0ffcf8bd49c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2969,6 +2969,24 @@ VkResult radv_CreateDevice( radv_dump_enabled_options(device, stderr); } + int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1); + if (radv_thread_trace >= 0) { + fprintf(stderr, "*****************************************************************************\n"); + fprintf(stderr, "* WARNING: Thread trace support is experimental and only supported on GFX9! *\n"); + fprintf(stderr, "*****************************************************************************\n"); + + /* TODO: add support for more ASICs. */ + assert(device->physical_device->rad_info.chip_class == GFX9); + + /* Default buffer size set to 1MB per SE. */ + device->thread_trace_buffer_size = + radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024); + device->thread_trace_start_frame = radv_thread_trace; + + if (!radv_thread_trace_init(device)) + goto fail; + } + /* Temporarily disable secure compile while we create meta shaders, etc */ uint8_t sc_threads = device->instance->num_sc_threads; if (sc_threads) @@ -3044,6 +3062,8 @@ fail_meta: fail: radv_bo_list_finish(&device->bo_list); + radv_thread_trace_finish(device); + if (device->trace_bo) device->ws->buffer_destroy(device->trace_bo); @@ -3093,6 +3113,9 @@ void radv_DestroyDevice( pthread_cond_destroy(&device->timeline_cond); radv_bo_list_finish(&device->bo_list); + + radv_thread_trace_finish(device); + if (radv_device_use_secure_compile(device->instance)) { for (unsigned i = 0; i < device->instance->num_sc_threads; i++ ) { destroy_secure_compile_device(device, i); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7c4032d1ec6..1f776c8f580 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -859,6 +859,7 @@ struct radv_device { struct radeon_winsys_bo *thread_trace_bo; void *thread_trace_ptr; uint32_t thread_trace_buffer_size; + int thread_trace_start_frame; }; struct radv_device_memory { diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index a2b0afa48c3..2da2049feb4 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -269,16 +269,55 @@ VkResult radv_AcquireNextImage2KHR( return result; } +/* TODO: Improve the way to trigger capture (overlay, etc). */ +static void +radv_handle_thread_trace(VkQueue _queue) +{ + RADV_FROM_HANDLE(radv_queue, queue, _queue); + static bool thread_trace_enabled = false; + static uint64_t num_frames = 0; + + if (thread_trace_enabled) { + struct radv_thread_trace thread_trace = {}; + + radv_end_thread_trace(queue); + thread_trace_enabled = false; + + /* TODO: Do something better than this whole sync. */ + radv_QueueWaitIdle(_queue); + + if (radv_get_thread_trace(queue, &thread_trace)) + radv_dump_thread_trace(queue->device, &thread_trace); + } else { + if (num_frames == queue->device->thread_trace_start_frame) { + radv_begin_thread_trace(queue); + assert(!thread_trace_enabled); + thread_trace_enabled = true; + } + } + num_frames++; +} + VkResult radv_QueuePresentKHR( VkQueue _queue, const VkPresentInfoKHR* pPresentInfo) { RADV_FROM_HANDLE(radv_queue, queue, _queue); - return wsi_common_queue_present(&queue->device->physical_device->wsi_device, - radv_device_to_handle(queue->device), - _queue, - queue->queue_family_index, - pPresentInfo); + VkResult result; + + result = wsi_common_queue_present(&queue->device->physical_device->wsi_device, + radv_device_to_handle(queue->device), + _queue, + queue->queue_family_index, + pPresentInfo); + if (result != VK_SUCCESS) + return result; + + if (unlikely(queue->device->thread_trace_bo)) { + radv_handle_thread_trace(_queue); + } + + return VK_SUCCESS; }