radv/winsys: Make WaitIdle queue aware.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 17 Dec 2016 18:10:35 +0000 (19:10 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 18 Dec 2016 19:52:20 +0000 (20:52 +0100)
Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c

index 0cac5bc989311e1742a92001ab02abb13442c5c4..cc89387ff70527f201901373041f21e9da6e4d7c 100644 (file)
@@ -879,7 +879,9 @@ VkResult radv_QueueWaitIdle(
 {
        RADV_FROM_HANDLE(radv_queue, queue, _queue);
 
-       queue->device->ws->ctx_wait_idle(queue->device->hw_ctx);
+       queue->device->ws->ctx_wait_idle(queue->device->hw_ctx,
+                                        radv_queue_family_to_ring(queue->queue_family_index),
+                                        queue->queue_idx);
        return VK_SUCCESS;
 }
 
@@ -888,7 +890,11 @@ VkResult radv_DeviceWaitIdle(
 {
        RADV_FROM_HANDLE(radv_device, device, _device);
 
-       device->ws->ctx_wait_idle(device->hw_ctx);
+       for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+               for (unsigned q = 0; q < device->queue_count[i]; q++) {
+                       radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
+               }
+       }
        return VK_SUCCESS;
 }
 
index db7650c9dbf1c11d0a98d83c20e24d030f9d5ea8..f29071be941b692f91d8a0523bff3333f9048027 100644 (file)
@@ -286,7 +286,8 @@ struct radeon_winsys {
        struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
        void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
 
-       bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx);
+       bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
+                             enum ring_type ring_type, int ring_index);
 
        struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
                                              enum ring_type ring_type);
index 325458f41bf777487acf520491ca0cfc5579e21e..fc02d4926396f7209d2697f5f10097bb7acb57a7 100644 (file)
@@ -501,6 +501,14 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
        return r;
 }
 
+static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
+                                   struct amdgpu_cs_request *request)
+{
+       radv_amdgpu_request_to_fence(ctx,
+                                    &ctx->last_submission[request->ip_type][request->ring],
+                                    request);
+}
+
 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
                                                struct radeon_winsys_cs **cs_array,
                                                unsigned cs_count,
@@ -560,7 +568,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
        if (fence)
                radv_amdgpu_request_to_fence(ctx, fence, &request);
 
-       ctx->last_seq_no = request.seq_no;
+       radv_assign_last_submit(ctx, &request);
 
        return r;
 }
@@ -625,7 +633,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
        if (fence)
                radv_amdgpu_request_to_fence(ctx, fence, &request);
 
-       ctx->last_seq_no = request.seq_no;
+       radv_assign_last_submit(ctx, &request);
 
        return 0;
 }
@@ -715,7 +723,9 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
        }
        if (fence)
                radv_amdgpu_request_to_fence(ctx, fence, &request);
-       ctx->last_seq_no = request.seq_no;
+
+       radv_assign_last_submit(ctx, &request);
+
        return 0;
 }
 
@@ -765,22 +775,16 @@ static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
        FREE(ctx);
 }
 
-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
+static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
+                                      enum ring_type ring_type, int ring_index)
 {
        struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+       int ip_type = ring_to_hw_ip(ring_type);
 
-       if (ctx->last_seq_no) {
+       if (ctx->last_submission[ip_type][ring_index].fence) {
                uint32_t expired;
-               struct amdgpu_cs_fence fence;
-
-               fence.context = ctx->ctx;
-               fence.ip_type = AMDGPU_HW_IP_GFX;
-               fence.ip_instance = 0;
-               fence.ring = 0;
-               fence.fence = ctx->last_seq_no;
-
-               int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
-                                                      &expired);
+               int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
+                                                      1000000000ull, 0, &expired);
 
                if (ret || !expired)
                        return false;
index affee9528e856f7f905c5dfb68572c1920d28f21..fc6a2c8efd70d340c73596f3976453fe3f7c8025 100644 (file)
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_winsys.h"
 
+enum {
+       MAX_RINGS_PER_TYPE = 8
+};
+
 struct radv_amdgpu_ctx {
        struct radv_amdgpu_winsys *ws;
        amdgpu_context_handle ctx;
-       uint64_t last_seq_no;
+       struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
 };
 
 static inline struct radv_amdgpu_ctx *
index 1ae78ac8d17f05fda249c15b32e7eaafa7860ef5..35b6bc57a42bec20d4fb2799a08d51048b81661f 100644 (file)
@@ -301,8 +301,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
        ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo);
        ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
        ws->info.has_virtual_memory = TRUE;
-       ws->info.sdma_rings = util_bitcount(dma.available_rings);
-       ws->info.compute_rings = util_bitcount(compute.available_rings);
+       ws->info.sdma_rings = MIN2(util_bitcount(dma.available_rings),
+                                  MAX_RINGS_PER_TYPE);
+       ws->info.compute_rings = MIN2(util_bitcount(compute.available_rings),
+                                     MAX_RINGS_PER_TYPE);
 
        /* Get the number of good compute units. */
        ws->info.num_good_compute_units = 0;