radv: Add userspace fence buffer per context.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 3 May 2017 00:21:39 +0000 (02:21 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 3 May 2017 01:10:12 +0000 (03:10 +0200)
Having it in the winsys didn't work when multiple devices use
the same winsys, as we then have multiple contexts per queue,
and each context counts separately.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Fixes: 7b9963a28f4 "radv: Enable userspace fence checking."
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h

index 7e46430856492dde2242261eb13aadb505e684cf..cbfd0d1a8e2191921a44c4e772a6dba14a345813 100644 (file)
@@ -90,25 +90,26 @@ static int ring_to_hw_ip(enum ring_type ring)
 }
 
 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
-                                        struct amdgpu_cs_fence *fence,
+                                        struct radv_amdgpu_fence *fence,
                                         struct amdgpu_cs_request *req)
 {
-       fence->context = ctx->ctx;
-       fence->ip_type = req->ip_type;
-       fence->ip_instance = req->ip_instance;
-       fence->ring = req->ring;
-       fence->fence = req->seq_no;
+       fence->fence.context = ctx->ctx;
+       fence->fence.ip_type = req->ip_type;
+       fence->fence.ip_instance = req->ip_instance;
+       fence->fence.ring = req->ring;
+       fence->fence.fence = req->seq_no;
+       fence->user_ptr = (volatile uint64_t*)(ctx->fence_map + (req->ip_type * MAX_RINGS_PER_TYPE + req->ring) * sizeof(uint64_t));
 }
 
 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
 {
-       struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
+       struct radv_amdgpu_fence *fence = calloc(1, sizeof(struct radv_amdgpu_fence));
        return (struct radeon_winsys_fence*)fence;
 }
 
 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
 {
-       struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+       struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
        free(fence);
 }
 
@@ -117,21 +118,20 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
                              bool absolute,
                              uint64_t timeout)
 {
-       struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)_ws;
-       struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+       struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
        unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
        int r;
        uint32_t expired = 0;
 
-       if (ws->fence_map) {
-               if (ws->fence_map[fence->ip_type * MAX_RINGS_PER_TYPE + fence->ring] >= fence->fence)
+       if (fence->user_ptr) {
+               if (*fence->user_ptr >= fence->fence.fence)
                        return true;
                if (!absolute && !timeout)
                        return false;
        }
 
        /* Now use the libdrm query. */
-       r = amdgpu_cs_query_fence_status(fence,
+       r = amdgpu_cs_query_fence_status(&fence->fence,
                                         timeout,
                                         flags,
                                         &expired);
@@ -627,11 +627,11 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
        return r;
 }
 
-static struct amdgpu_cs_fence_info radv_set_cs_fence(struct radv_amdgpu_winsys *ws, int ip_type, int ring)
+static struct amdgpu_cs_fence_info radv_set_cs_fence(struct radv_amdgpu_ctx *ctx, int ip_type, int ring)
 {
        struct amdgpu_cs_fence_info ret = {0};
-       if (ws->fence_map) {
-               ret.handle = radv_amdgpu_winsys_bo(ws->fence_bo)->bo;
+       if (ctx->fence_map) {
+               ret.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
                ret.offset = (ip_type * MAX_RINGS_PER_TYPE + ring) * sizeof(uint64_t);
        }
        return ret;
@@ -655,7 +655,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 {
        int r;
        struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-       struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+       struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
        struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
        amdgpu_bo_list_handle bo_list;
        struct amdgpu_cs_request request = {0};
@@ -694,7 +694,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
        request.number_of_ibs = 1;
        request.ibs = &cs0->ib;
        request.resources = bo_list;
-       request.fence_info = radv_set_cs_fence(cs0->ws, cs0->hw_ip, queue_idx);
+       request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
 
        if (initial_preamble_cs) {
                request.ibs = ibs;
@@ -732,7 +732,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 {
        int r;
        struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-       struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+       struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
        amdgpu_bo_list_handle bo_list;
        struct amdgpu_cs_request request;
 
@@ -759,7 +759,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
                request.resources = bo_list;
                request.number_of_ibs = cnt + !!preamble_cs;
                request.ibs = ibs;
-               request.fence_info = radv_set_cs_fence(cs0->ws, cs0->hw_ip, queue_idx);
+               request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
 
                if (preamble_cs) {
                        ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
@@ -809,7 +809,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 {
        int r;
        struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-       struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+       struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
        struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
        struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
        amdgpu_bo_list_handle bo_list;
@@ -878,7 +878,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
                request.resources = bo_list;
                request.number_of_ibs = 1;
                request.ibs = &ib;
-               request.fence_info = radv_set_cs_fence(cs0->ws, cs0->hw_ip, queue_idx);
+               request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
 
                r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
                if (r) {
@@ -991,6 +991,15 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
                goto error_create;
        }
        ctx->ws = ws;
+
+       assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
+       ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
+                                             RADEON_DOMAIN_GTT,
+                                             RADEON_FLAG_CPU_ACCESS);
+       if (ctx->fence_bo)
+               ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
+       if (ctx->fence_map)
+               memset(ctx->fence_map, 0, 4096);
        return (struct radeon_winsys_ctx *)ctx;
 error_create:
        FREE(ctx);
@@ -1000,6 +1009,7 @@ error_create:
 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 {
        struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+       ctx->ws->base.buffer_destroy(ctx->fence_bo);
        amdgpu_cs_ctx_free(ctx->ctx);
        FREE(ctx);
 }
@@ -1010,9 +1020,9 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
        struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
        int ip_type = ring_to_hw_ip(ring_type);
 
-       if (ctx->last_submission[ip_type][ring_index].fence) {
+       if (ctx->last_submission[ip_type][ring_index].fence.fence) {
                uint32_t expired;
-               int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
+               int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
                                                       1000000000ull, 0, &expired);
 
                if (ret || !expired)
index fc6a2c8efd70d340c73596f3976453fe3f7c8025..42d89eee54d0bc311b4cfb7eb9e12e2b75640844 100644 (file)
@@ -42,10 +42,19 @@ enum {
        MAX_RINGS_PER_TYPE = 8
 };
 
+
+struct radv_amdgpu_fence {
+       struct amdgpu_cs_fence fence;
+       volatile uint64_t *user_ptr;
+};
+
 struct radv_amdgpu_ctx {
        struct radv_amdgpu_winsys *ws;
        amdgpu_context_handle ctx;
-       struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+       struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+
+       struct radeon_winsys_bo *fence_bo;
+       uint64_t *fence_map;
 };
 
 static inline struct radv_amdgpu_ctx *
index 837493c06729f056ef7b9321550d74b1cbf9eb2e..783567fa5b4fd0cfb73e21b7c04121113facb9e1 100644 (file)
@@ -342,7 +342,6 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
 {
        struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
 
-       ws->base.buffer_destroy(ws->fence_bo);
        AddrDestroy(ws->addrlib);
        amdgpu_device_deinitialize(ws->dev);
        FREE(rws);
@@ -381,14 +380,6 @@ radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
        radv_amdgpu_cs_init_functions(ws);
        radv_amdgpu_surface_init_functions(ws);
 
-       assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
-       ws->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
-                                             RADEON_DOMAIN_GTT,
-                                             RADEON_FLAG_CPU_ACCESS);
-       if (ws->fence_bo)
-               ws->fence_map = (uint64_t*)ws->base.buffer_map(ws->fence_bo);
-       if (ws->fence_map)
-               memset(ws->fence_map, 0, 4096);
        return &ws->base;
 
 winsys_fail:
index c136da0c1ecc76fd2b0cbea70ef7709ffed0d934..abb238b358d6cea7f3e7f1aa71ab0a78e0bf0627 100644 (file)
@@ -50,9 +50,6 @@ struct radv_amdgpu_winsys {
        unsigned num_buffers;
 
        bool use_ib_bos;
-
-       struct radeon_winsys_bo *fence_bo;
-       uint64_t *fence_map;
 };
 
 static inline struct radv_amdgpu_winsys *