radv/winsys: Set winsys bo priority on creation.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sun, 27 Jan 2019 23:28:05 +0000 (00:28 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 29 Jan 2019 14:56:41 +0000 (15:56 +0100)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
12 files changed:
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.c
src/amd/vulkan/radv_descriptor_set.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_query.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/si_cmd_buffer.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c

index aae9029084143a2e1e58debfb060ce1d54ce2841..7f7f052986e8b39e24196c6a32923abd6b3228e9 100644 (file)
@@ -374,7 +374,8 @@ radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
                                       RADEON_DOMAIN_GTT,
                                       RADEON_FLAG_CPU_ACCESS|
                                       RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                      RADEON_FLAG_32BIT);
+                                      RADEON_FLAG_32BIT,
+                                      RADV_BO_PRIORITY_UPLOAD_BUFFER);
 
        if (!bo) {
                cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
index 08fc80c12ab81cc3d167d81b84b5d9735efd0bc1..6234904aaa4defaa7722315f2ec037f6c10709b6 100644 (file)
@@ -63,7 +63,8 @@ radv_init_trace(struct radv_device *device)
        device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
                                             RADEON_DOMAIN_VRAM,
                                             RADEON_FLAG_CPU_ACCESS|
-                                            RADEON_FLAG_NO_INTERPROCESS_SHARING);
+                                            RADEON_FLAG_NO_INTERPROCESS_SHARING,
+                                            RADV_BO_PRIORITY_UPLOAD_BUFFER);
        if (!device->trace_bo)
                return false;
 
index 30f982252bbea08af1223e5c96b74f05e8c53270..cebe06aa07898d9e788e566bdab440bd36d1b89d 100644 (file)
@@ -598,7 +598,8 @@ VkResult radv_CreateDescriptorPool(
                                                     RADEON_DOMAIN_VRAM,
                                                     RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                                     RADEON_FLAG_READ_ONLY |
-                                                    RADEON_FLAG_32BIT);
+                                                    RADEON_FLAG_32BIT,
+                                                    RADV_BO_PRIORITY_DESCRIPTOR);
                pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
        }
        pool->size = bo_size;
index 8ba04cd2f58109ce8548ff3a103da6c090922810..fdf051bcce9ed80c5c2bb22bd821332212d0c4df 100644 (file)
@@ -2373,7 +2373,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                              scratch_size,
                                                              4096,
                                                              RADEON_DOMAIN_VRAM,
-                                                             ring_bo_flags);
+                                                             ring_bo_flags,
+                                                             RADV_BO_PRIORITY_SCRATCH);
                if (!scratch_bo)
                        goto fail;
        } else
@@ -2384,7 +2385,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                      compute_scratch_size,
                                                                      4096,
                                                                      RADEON_DOMAIN_VRAM,
-                                                                     ring_bo_flags);
+                                                                     ring_bo_flags,
+                                                                     RADV_BO_PRIORITY_SCRATCH);
                if (!compute_scratch_bo)
                        goto fail;
 
@@ -2396,7 +2398,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                esgs_ring_size,
                                                                4096,
                                                                RADEON_DOMAIN_VRAM,
-                                                               ring_bo_flags);
+                                                               ring_bo_flags,
+                                                               RADV_BO_PRIORITY_SCRATCH);
                if (!esgs_ring_bo)
                        goto fail;
        } else {
@@ -2409,7 +2412,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                gsvs_ring_size,
                                                                4096,
                                                                RADEON_DOMAIN_VRAM,
-                                                               ring_bo_flags);
+                                                               ring_bo_flags,
+                                                               RADV_BO_PRIORITY_SCRATCH);
                if (!gsvs_ring_bo)
                        goto fail;
        } else {
@@ -2422,7 +2426,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                 tess_offchip_ring_offset + tess_offchip_ring_size,
                                                                 256,
                                                                 RADEON_DOMAIN_VRAM,
-                                                                ring_bo_flags);
+                                                                ring_bo_flags,
+                                                                RADV_BO_PRIORITY_SCRATCH);
                if (!tess_rings_bo)
                        goto fail;
        } else {
@@ -2450,7 +2455,8 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                 RADEON_DOMAIN_VRAM,
                                                                 RADEON_FLAG_CPU_ACCESS |
                                                                 RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                                RADEON_FLAG_READ_ONLY);
+                                                                RADEON_FLAG_READ_ONLY,
+                                                                RADV_BO_PRIORITY_DESCRIPTOR);
                if (!descriptor_bo)
                        goto fail;
        } else
@@ -3088,7 +3094,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                       import_info->handleType ==
                       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
                mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
-                                                    NULL, NULL);
+                                                    RADV_BO_PRIORITY_DEFAULT, NULL, NULL);
                if (!mem->bo) {
                        result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
                        goto fail;
@@ -3099,7 +3105,8 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
                assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
                mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
-                                                     pAllocateInfo->allocationSize);
+                                                     pAllocateInfo->allocationSize,
+                                                     RADV_BO_PRIORITY_DEFAULT);
                if (!mem->bo) {
                        result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
                        goto fail;
@@ -3126,7 +3133,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                        flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
                mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
-                                                   domain, flags);
+                                                   domain, flags, RADV_BO_PRIORITY_DEFAULT);
 
                if (!mem->bo) {
                        result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -3886,7 +3893,8 @@ VkResult radv_CreateEvent(
 
        event->bo = device->ws->buffer_create(device->ws, 8, 8,
                                              RADEON_DOMAIN_GTT,
-                                             RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
+                                             RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+                                             RADV_BO_PRIORITY_FENCE);
        if (!event->bo) {
                vk_free2(&device->alloc, pAllocator, event);
                return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
@@ -3972,7 +3980,8 @@ VkResult radv_CreateBuffer(
        if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
                buffer->bo = device->ws->buffer_create(device->ws,
                                                       align64(buffer->size, 4096),
-                                                      4096, 0, RADEON_FLAG_VIRTUAL);
+                                                      4096, 0, RADEON_FLAG_VIRTUAL,
+                                                      RADV_BO_PRIORITY_VIRTUAL);
                if (!buffer->bo) {
                        vk_free2(&device->alloc, pAllocator, buffer);
                        return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
index 6b51efaf91911f54b36380310a52d2e590a54158..fc8c6a2ebfe0baec7aed717ec5a961bf413c5b49 100644 (file)
@@ -1046,7 +1046,7 @@ radv_image_create(VkDevice _device,
                image->offset = 0;
 
                image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
-                                                     0, RADEON_FLAG_VIRTUAL);
+                                                     0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
                if (!image->bo) {
                        vk_free2(&device->alloc, alloc, image);
                        return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
index 84166ccf250a37c521a128934c1ab83223f7ea72..894b8cd510dbba443ce419e08c95171b7c7e4a17 100644 (file)
@@ -1061,7 +1061,8 @@ VkResult radv_CreateQueryPool(
                pool->size += 4 * pCreateInfo->queryCount;
 
        pool->bo = device->ws->buffer_create(device->ws, pool->size,
-                                            64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING);
+                                            64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
+                                            RADV_BO_PRIORITY_QUERY_POOL);
 
        if (!pool->bo) {
                vk_free2(&device->alloc, pAllocator, pool);
index d9b46d89cf3402b6d79c4fdc63c8184a07a2f0db..2684aadc81fcf3aa415b58038ce7a9af69a6612b 100644 (file)
@@ -188,6 +188,29 @@ struct radv_winsys_bo_list {
        unsigned count;
 };
 
+/* Kernel effectively allows 0-31. This sets some priorities for fixed
+ * functionality buffers */
+enum {
+       RADV_BO_PRIORITY_DEFAULT = 14,
+
+       RADV_BO_PRIORITY_APPLICATION_MAX = 28,
+
+       /* virtual buffers have 0 priority since the priority is not used. */
+       RADV_BO_PRIORITY_VIRTUAL = 0,
+
+       /* This should be considerably lower than most of the stuff below,
+        * but how much lower is hard to say since we don't know application
+        * assignments. Put it pretty high since it is GTT anyway. */
+       RADV_BO_PRIORITY_QUERY_POOL = 29,
+
+       RADV_BO_PRIORITY_DESCRIPTOR = 30,
+       RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
+       RADV_BO_PRIORITY_FENCE = 30,
+       RADV_BO_PRIORITY_SHADER = 31,
+       RADV_BO_PRIORITY_SCRATCH = 31,
+       RADV_BO_PRIORITY_CS = 31,
+};
+
 struct radeon_winsys {
        void (*destroy)(struct radeon_winsys *ws);
 
@@ -206,17 +229,20 @@ struct radeon_winsys {
                                                  uint64_t size,
                                                  unsigned alignment,
                                                  enum radeon_bo_domain domain,
-                                                 enum radeon_bo_flag flags);
+                                                 enum radeon_bo_flag flags,
+                                                 unsigned priority);
 
        void (*buffer_destroy)(struct radeon_winsys_bo *bo);
        void *(*buffer_map)(struct radeon_winsys_bo *bo);
 
        struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
                                                    void *pointer,
-                                                   uint64_t size);
+                                                   uint64_t size,
+                                                   unsigned priority);
 
        struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
                                                   int fd,
+                                                  unsigned priority,
                                                   unsigned *stride, unsigned *offset);
 
        bool (*buffer_get_fd)(struct radeon_winsys *ws,
index 42f5e4efe6c64bdd5f3c9293e138eb39b379663e..32cd9ae25e91c213a05606694c9958d9f051e6ad 100644 (file)
@@ -395,7 +395,8 @@ radv_alloc_shader_memory(struct radv_device *device,
                                             RADEON_DOMAIN_VRAM,
                                             RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                             (device->physical_device->cpdma_prefetch_writes_memory ?
-                                                    0 : RADEON_FLAG_READ_ONLY));
+                                                    0 : RADEON_FLAG_READ_ONLY),
+                                            RADV_BO_PRIORITY_SHADER);
        slab->ptr = (char*)device->ws->buffer_map(slab->bo);
        list_inithead(&slab->shaders);
 
index 1005ab3f7e78f42dbdcb3d6b6f56f6dd55c29d02..e75c6d127d66968ac4d23a3c7cf1fa88c4188b7e 100644 (file)
@@ -399,7 +399,8 @@ cik_create_gfx_config(struct radv_device *device)
                                                     RADEON_DOMAIN_GTT,
                                                     RADEON_FLAG_CPU_ACCESS|
                                                     RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                    RADEON_FLAG_READ_ONLY);
+                                                    RADEON_FLAG_READ_ONLY,
+                                                    RADV_BO_PRIORITY_CS);
        if (!device->gfx_init)
                goto fail;
 
index 7194d5a3236809ac0ea4cfe60f2dc2f9a876fadf..83e6f37f58b08eb536e04e191b2f7a1dcff620ae 100644 (file)
@@ -302,7 +302,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                             uint64_t size,
                             unsigned alignment,
                             enum radeon_bo_domain initial_domain,
-                            unsigned flags)
+                            unsigned flags,
+                            unsigned priority)
 {
        struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
        struct radv_amdgpu_winsys_bo *bo;
@@ -392,6 +393,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
        bo->bo = buf_handle;
        bo->initial_domain = initial_domain;
        bo->is_shared = false;
+       bo->priority = priority;
 
        if (initial_domain & RADEON_DOMAIN_VRAM)
                p_atomic_add(&ws->allocated_vram,
@@ -460,7 +462,8 @@ radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
 static struct radeon_winsys_bo *
 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
                                void *pointer,
-                               uint64_t size)
+                               uint64_t size,
+                              unsigned priority)
 {
        struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
        amdgpu_bo_handle buf_handle;
@@ -498,6 +501,7 @@ radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
        bo->ws = ws;
        bo->bo = buf_handle;
        bo->initial_domain = RADEON_DOMAIN_GTT;
+       bo->priority = priority;
 
        p_atomic_add(&ws->allocated_gtt,
                     align64(bo->size, ws->info.gart_page_size));
@@ -518,7 +522,8 @@ error:
 
 static struct radeon_winsys_bo *
 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
-                             int fd, unsigned *stride,
+                             int fd, unsigned priority,
+                             unsigned *stride,
                              unsigned *offset)
 {
        struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
@@ -565,6 +570,7 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
        bo->size = result.alloc_size;
        bo->is_shared = true;
        bo->ws = ws;
+       bo->priority = priority;
        bo->ref_count = 1;
 
        if (bo->initial_domain & RADEON_DOMAIN_VRAM)
index f32e43083860f2e3baebe6a7d5169c5bc39bb17b..400f79deff762238ba8da4994583154286483891 100644 (file)
@@ -45,6 +45,7 @@ struct radv_amdgpu_winsys_bo {
        uint64_t size;
        struct radv_amdgpu_winsys *ws;
        bool is_virtual;
+       uint8_t priority;
        int ref_count;
 
        union {
index d4de9447530eed3470060e76a3b5b9ed9cff9329..366eb3481a2fbfac6c5b5522de5b8172325c44bf 100644 (file)
@@ -243,7 +243,8 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws,
                                                  RADEON_DOMAIN_GTT,
                                                  RADEON_FLAG_CPU_ACCESS |
                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                 RADEON_FLAG_READ_ONLY);
+                                                 RADEON_FLAG_READ_ONLY,
+                                                 RADV_BO_PRIORITY_CS);
                if (!cs->ib_buffer) {
                        free(cs);
                        return NULL;
@@ -358,7 +359,8 @@ static void radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
                                                   RADEON_DOMAIN_GTT,
                                                   RADEON_FLAG_CPU_ACCESS |
                                                   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                  RADEON_FLAG_READ_ONLY);
+                                                  RADEON_FLAG_READ_ONLY,
+                                                  RADV_BO_PRIORITY_CS);
 
        if (!cs->ib_buffer) {
                cs->base.cdw = 0;
@@ -1016,7 +1018,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
                                                           RADEON_DOMAIN_GTT,
                                                           RADEON_FLAG_CPU_ACCESS |
                                                           RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                          RADEON_FLAG_READ_ONLY);
+                                                          RADEON_FLAG_READ_ONLY,
+                                                          RADV_BO_PRIORITY_CS);
                                ptr = ws->buffer_map(bos[j]);
 
                                if (needs_preamble) {
@@ -1055,7 +1058,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
                                                   RADEON_DOMAIN_GTT,
                                                   RADEON_FLAG_CPU_ACCESS |
                                                   RADEON_FLAG_NO_INTERPROCESS_SHARING |
-                                                  RADEON_FLAG_READ_ONLY);
+                                                  RADEON_FLAG_READ_ONLY,
+                                                  RADV_BO_PRIORITY_CS);
                        ptr = ws->buffer_map(bos[0]);
 
                        if (preamble_cs) {
@@ -1249,8 +1253,9 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
        assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
        ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
                                              RADEON_DOMAIN_GTT,
-                                             RADEON_FLAG_CPU_ACCESS|
-                                              RADEON_FLAG_NO_INTERPROCESS_SHARING);
+                                             RADEON_FLAG_CPU_ACCESS |
+                                             RADEON_FLAG_NO_INTERPROCESS_SHARING,
+                                             RADV_BO_PRIORITY_CS);
        if (ctx->fence_bo)
                ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
        if (ctx->fence_map)