radv: allow to capture SQTT traces with RADV_THREAD_TRACE=<start_frame>
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 20 Feb 2020 12:22:31 +0000 (13:22 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 28 Feb 2020 07:11:11 +0000 (08:11 +0100)
This is pretty basic (and a bit crappy at the moment). I think we
might want some sort of overlay in the future and also be able to
trigger captures with F12 or whatever.

To record a capture, set RADV_THREAD_TRACE to something greater than
zero (eg. RADV_THREAD_TRACE=100 will capture frame #100). If the
driver didn't crash (or the GPU didn't hang), the capture file
should be stored in /tmp.

To open that capture, use Radeon GPU Profiler and enjoy your
profiling times with RADV! \o/

Note that thread trace support is quite experimental, only GFX9 is
supported at the moment, and a bunch of useful stuff are still missing
(shader ISA, pipelines info, etc). More is comming soon.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3900>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3900>

src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_wsi.c

index 04fdb462a92c6fb270b734b31fed09e4b4ce49fe..0ffcf8bd49cfba0e48d2c05aa642eef1c7a21752 100644 (file)
@@ -2969,6 +2969,24 @@ VkResult radv_CreateDevice(
                radv_dump_enabled_options(device, stderr);
        }
 
+       int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
+       if (radv_thread_trace >= 0) {
+               fprintf(stderr, "*****************************************************************************\n");
+               fprintf(stderr, "* WARNING: Thread trace support is experimental and only supported on GFX9! *\n");
+               fprintf(stderr, "*****************************************************************************\n");
+
+               /* TODO: add support for more ASICs. */
+               assert(device->physical_device->rad_info.chip_class == GFX9);
+
+               /* Default buffer size set to 1MB per SE. */
+               device->thread_trace_buffer_size =
+                       radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
+               device->thread_trace_start_frame = radv_thread_trace;
+
+               if (!radv_thread_trace_init(device))
+                       goto fail;
+       }
+
        /* Temporarily disable secure compile while we create meta shaders, etc */
        uint8_t sc_threads = device->instance->num_sc_threads;
        if (sc_threads)
@@ -3044,6 +3062,8 @@ fail_meta:
 fail:
        radv_bo_list_finish(&device->bo_list);
 
+       radv_thread_trace_finish(device);
+
        if (device->trace_bo)
                device->ws->buffer_destroy(device->trace_bo);
 
@@ -3093,6 +3113,9 @@ void radv_DestroyDevice(
 
        pthread_cond_destroy(&device->timeline_cond);
        radv_bo_list_finish(&device->bo_list);
+
+       radv_thread_trace_finish(device);
+
        if (radv_device_use_secure_compile(device->instance)) {
                for (unsigned i = 0; i < device->instance->num_sc_threads; i++ ) {
                        destroy_secure_compile_device(device, i);
index 7c4032d1ec640b8761aec3f79096b7742c7014d0..1f776c8f5805100101829f62c96b307704402e62 100644 (file)
@@ -859,6 +859,7 @@ struct radv_device {
        struct radeon_winsys_bo *thread_trace_bo;
        void *thread_trace_ptr;
        uint32_t thread_trace_buffer_size;
+       int thread_trace_start_frame;
 };
 
 struct radv_device_memory {
index a2b0afa48c3a6f324367ecf4d9105725acd8cb6c..2da2049feb4119eda5f914a3a83737861aeb01c2 100644 (file)
@@ -269,16 +269,55 @@ VkResult radv_AcquireNextImage2KHR(
        return result;
 }
 
+/* TODO: Improve the way to trigger capture (overlay, etc). */
+static void
+radv_handle_thread_trace(VkQueue _queue)
+{
+       RADV_FROM_HANDLE(radv_queue, queue, _queue);
+       static bool thread_trace_enabled = false;
+       static uint64_t num_frames = 0;
+
+       if (thread_trace_enabled) {
+               struct radv_thread_trace thread_trace = {};
+
+               radv_end_thread_trace(queue);
+               thread_trace_enabled = false;
+
+               /* TODO: Do something better than this whole sync. */
+               radv_QueueWaitIdle(_queue);
+
+               if (radv_get_thread_trace(queue, &thread_trace))
+                       radv_dump_thread_trace(queue->device, &thread_trace);
+       } else {
+               if (num_frames == queue->device->thread_trace_start_frame) {
+                       radv_begin_thread_trace(queue);
+                       assert(!thread_trace_enabled);
+                       thread_trace_enabled = true;
+               }
+       }
+       num_frames++;
+}
+
 VkResult radv_QueuePresentKHR(
        VkQueue                                  _queue,
        const VkPresentInfoKHR*                  pPresentInfo)
 {
        RADV_FROM_HANDLE(radv_queue, queue, _queue);
-       return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
-                                       radv_device_to_handle(queue->device),
-                                       _queue,
-                                       queue->queue_family_index,
-                                       pPresentInfo);
+       VkResult result;
+
+       result = wsi_common_queue_present(&queue->device->physical_device->wsi_device,
+                                         radv_device_to_handle(queue->device),
+                                         _queue,
+                                         queue->queue_family_index,
+                                         pPresentInfo);
+       if (result != VK_SUCCESS)
+               return result;
+
+       if (unlikely(queue->device->thread_trace_bo)) {
+               radv_handle_thread_trace(_queue);
+       }
+
+       return VK_SUCCESS;
 }