radv: use a global BO list only for VK_EXT_descriptor_indexing
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 19 Apr 2018 11:48:33 +0000 (13:48 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 20 Apr 2018 14:18:18 +0000 (16:18 +0200)
Maintaining two different paths is annoying but this gets
rid of the performance regression introduced by the global
BO list.

We might find a better solution in the future, but for now
just keeps two paths.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_private.h

index b06429abd766298736ed476043cb5c3aac915487..baab8db6170a7a04a99e4de5594469fce7487cdc 100644 (file)
@@ -2206,9 +2206,11 @@ radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
 
        assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 
-       for (unsigned j = 0; j < set->layout->buffer_count; ++j)
-               if (set->descriptors[j])
-                       radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7);
+       if (!cmd_buffer->device->use_global_bo_list) {
+               for (unsigned j = 0; j < set->layout->buffer_count; ++j)
+                       if (set->descriptors[j])
+                               radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7);
+       }
 
        if(set->bo)
                radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8);
index 9950ed40f1bfa76af6e201cbdee1b4ff27743724..edf099e4f0fbea242c02aeff13f930706599fec0 100644 (file)
@@ -1302,8 +1302,14 @@ radv_bo_list_finish(struct radv_bo_list *bo_list)
        pthread_mutex_destroy(&bo_list->mutex);
 }
 
-static VkResult radv_bo_list_add(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo)
+static VkResult radv_bo_list_add(struct radv_device *device,
+                                struct radeon_winsys_bo *bo)
 {
+       struct radv_bo_list *bo_list = &device->bo_list;
+
+       if (unlikely(!device->use_global_bo_list))
+               return VK_SUCCESS;
+
        pthread_mutex_lock(&bo_list->mutex);
        if (bo_list->list.count == bo_list->capacity) {
                unsigned capacity = MAX2(4, bo_list->capacity * 2);
@@ -1323,8 +1329,14 @@ static VkResult radv_bo_list_add(struct radv_bo_list *bo_list, struct radeon_win
        return VK_SUCCESS;
 }
 
-static void radv_bo_list_remove(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo)
+static void radv_bo_list_remove(struct radv_device *device,
+                               struct radeon_winsys_bo *bo)
 {
+       struct radv_bo_list *bo_list = &device->bo_list;
+
+       if (unlikely(!device->use_global_bo_list))
+               return;
+
        pthread_mutex_lock(&bo_list->mutex);
        for(unsigned i = 0; i < bo_list->list.count; ++i) {
                if (bo_list->list.bos[i] == bo) {
@@ -1434,6 +1446,12 @@ VkResult radv_CreateDevice(
 
        keep_shader_info = device->enabled_extensions.AMD_shader_info;
 
+       /* With update after bind we can't attach bo's to the command buffer
+        * from the descriptor set anymore, so we have to use a global BO list.
+        */
+       device->use_global_bo_list =
+               device->enabled_extensions.EXT_descriptor_indexing;
+
        mtx_init(&device->shader_slab_mutex, mtx_plain);
        list_inithead(&device->shader_slabs);
 
@@ -2506,14 +2524,16 @@ VkResult radv_QueueSubmit(
                        sem_info.cs_emit_wait = j == 0;
                        sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
 
-                       pthread_mutex_lock(&queue->device->bo_list.mutex);
+                       if (unlikely(queue->device->use_global_bo_list))
+                               pthread_mutex_lock(&queue->device->bo_list.mutex);
 
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
                                                        advance, initial_preamble, continue_preamble_cs,
                                                        &sem_info, &queue->device->bo_list.list,
                                                        can_patch, base_fence);
 
-                       pthread_mutex_unlock(&queue->device->bo_list.mutex);
+                       if (unlikely(queue->device->use_global_bo_list))
+                               pthread_mutex_unlock(&queue->device->bo_list.mutex);
 
                        if (ret) {
                                radv_loge("failed to submit CS %d\n", i);
@@ -2761,7 +2781,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                mem->type_index = mem_type_index;
        }
 
-       result = radv_bo_list_add(&device->bo_list, mem->bo);
+       result = radv_bo_list_add(device, mem->bo);
        if (result != VK_SUCCESS)
                goto fail_bo;
 
@@ -2798,7 +2818,7 @@ void radv_FreeMemory(
        if (mem == NULL)
                return;
 
-       radv_bo_list_remove(&device->bo_list, mem->bo);
+       radv_bo_list_remove(device, mem->bo);
        device->ws->buffer_destroy(mem->bo);
        mem->bo = NULL;
 
index dfe4c5f942245be676c658100e38bda4d5b423e5..883342ede88ea890d5c6a198a2ef30f0c03bd600 100644 (file)
@@ -670,6 +670,9 @@ struct radv_device {
 
        struct radv_device_extension_table enabled_extensions;
 
+       /* Whether the driver uses a global BO list. */
+       bool use_global_bo_list;
+
        struct radv_bo_list bo_list;
 };