radv: Use local buffers for the global bo list.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 9 Apr 2019 23:42:31 +0000 (01:42 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 15 Apr 2019 18:39:38 +0000 (20:39 +0200)
Even if we don't use local buffers in general. Turns out that even
though the performance is not the best the kernel still does it
better than our own list.

We still have to keep the radv bo list for buffers that are shared
externally.

This improves Talos on lowest quality setting (so as CPU bound as
possible) by ~10% if the global bo list is enabled.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c

index 6fda43c03722f85f4548e27f05cb46919eef5316..62ee32ecf2c5a53542a28eab3c93f4a19b1d1752 100644 (file)
@@ -3189,8 +3189,12 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
                        flags |= RADEON_FLAG_GTT_WC;
 
-               if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
+               if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
                        flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+                       if (device->use_global_bo_list) {
+                               flags |= RADEON_FLAG_PREFER_LOCAL_BO;
+                       }
+               }
 
                mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
                                                    domain, flags, priority);
index 73410efc252eb25649b1ac20ebec1181f04aee21..ac3a8af600092210b4e65ef5621668f69fa28693 100644 (file)
@@ -58,6 +58,7 @@ enum radeon_bo_flag { /* bitfield */
        RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
        RADEON_FLAG_READ_ONLY =     (1 << 7),
        RADEON_FLAG_32BIT =         (1 << 8),
+       RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
 };
 
 enum radeon_bo_usage { /* bitfield */
index 79958cdd7cb803bf24b0b39a9e8a84c84b7f02f3..568716aa90dedaced5dad695de008e6ac898e00b 100644 (file)
@@ -368,7 +368,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
        if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
                request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
        if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
-           ws->info.has_local_buffers && ws->use_local_bos) {
+           ws->info.has_local_buffers &&
+           (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
                bo->base.is_local = true;
                request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
        }