Revert "radv: Don't store buffer references in the descriptor set."
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 19 Apr 2018 11:39:17 +0000 (13:39 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 20 Apr 2018 14:18:13 +0000 (16:18 +0200)
In order to reduce a performance regression introduced by
4b13fe55a4 ("radv: Keep a global BO list for VkMemory."),
we are going to maintain two different paths.

One when VK_EXT_descriptor_indexing is enabled by the
application because we need to have a global BO list, and
one (the old one) when it's not enabled.

With Talos on Polaris, the global BO list reduces performance
by 10% which is too much for me.

This reverts commit ab6cadd3ecc7fbdd9079808b407674e0b19c52f0.
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.c
src/amd/vulkan/radv_descriptor_set.c
src/amd/vulkan/radv_descriptor_set.h
src/amd/vulkan/radv_private.h

index 1afdeda486515861e5025ab09008b6b0e079036f..b06429abd766298736ed476043cb5c3aac915487 100644 (file)
@@ -2206,6 +2206,10 @@ radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
 
        assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 
+       for (unsigned j = 0; j < set->layout->buffer_count; ++j)
+               if (set->descriptors[j])
+                       radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j], 7);
+
        if(set->bo)
                radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8);
 }
index 2e9e016523700a90f1fbb4b1c94d18f510d7e44c..368bc4b5d0561aca91015d5dad52c8979ed6f98a 100644 (file)
@@ -250,6 +250,7 @@ radv_dump_descriptor_set(enum chip_class chip_class,
        fprintf(f, "\tshader_stages: %x\n", layout->shader_stages);
        fprintf(f, "\tdynamic_shader_stages: %x\n",
                layout->dynamic_shader_stages);
+       fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count);
        fprintf(f, "\tdynamic_offset_count: %d\n",
                layout->dynamic_offset_count);
        fprintf(f, "\n");
@@ -265,6 +266,8 @@ radv_dump_descriptor_set(enum chip_class chip_class,
                        layout->binding[i].array_size);
                fprintf(f, "\t\toffset: %d\n",
                        layout->binding[i].offset);
+               fprintf(f, "\t\tbuffer_offset: %d\n",
+                       layout->binding[i].buffer_offset);
                fprintf(f, "\t\tdynamic_offset_offset: %d\n",
                        layout->binding[i].dynamic_offset_offset);
                fprintf(f, "\t\tdynamic_offset_count: %d\n",
index 55b4aaa388cc83e2c286a51a094bfc5bf45afb1c..4b08a1f0f807aee4de74250c1686f50804285f5e 100644 (file)
@@ -117,12 +117,14 @@ VkResult radv_CreateDescriptorSetLayout(
 
        memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout));
 
+       uint32_t buffer_count = 0;
        uint32_t dynamic_offset_count = 0;
 
        for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
                const VkDescriptorSetLayoutBinding *binding = bindings + j;
                uint32_t b = binding->binding;
                uint32_t alignment;
+               unsigned binding_buffer_count = 0;
 
                switch (binding->descriptorType) {
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -131,6 +133,7 @@ VkResult radv_CreateDescriptorSetLayout(
                        set_layout->binding[b].dynamic_offset_count = 1;
                        set_layout->dynamic_shader_stages |= binding->stageFlags;
                        set_layout->binding[b].size = 0;
+                       binding_buffer_count = 1;
                        alignment = 1;
                        break;
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -138,6 +141,7 @@ VkResult radv_CreateDescriptorSetLayout(
                case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
                        set_layout->binding[b].size = 16;
+                       binding_buffer_count = 1;
                        alignment = 16;
                        break;
                case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@@ -145,11 +149,13 @@ VkResult radv_CreateDescriptorSetLayout(
                case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
                        /* main descriptor + fmask descriptor */
                        set_layout->binding[b].size = 64;
+                       binding_buffer_count = 1;
                        alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
                        /* main descriptor + fmask descriptor + sampler */
                        set_layout->binding[b].size = 96;
+                       binding_buffer_count = 1;
                        alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -165,6 +171,7 @@ VkResult radv_CreateDescriptorSetLayout(
                set_layout->binding[b].type = binding->descriptorType;
                set_layout->binding[b].array_size = binding->descriptorCount;
                set_layout->binding[b].offset = set_layout->size;
+               set_layout->binding[b].buffer_offset = buffer_count;
                set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
 
                if (variable_flags && binding->binding < variable_flags->bindingCount &&
@@ -197,6 +204,7 @@ VkResult radv_CreateDescriptorSetLayout(
                }
 
                set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
+               buffer_count += binding->descriptorCount * binding_buffer_count;
                dynamic_offset_count += binding->descriptorCount *
                        set_layout->binding[b].dynamic_offset_count;
                set_layout->shader_stages |= binding->stageFlags;
@@ -204,6 +212,7 @@ VkResult radv_CreateDescriptorSetLayout(
 
        free(bindings);
 
+       set_layout->buffer_count = buffer_count;
        set_layout->dynamic_offset_count = dynamic_offset_count;
 
        *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
@@ -396,7 +405,8 @@ radv_descriptor_set_create(struct radv_device *device,
                           struct radv_descriptor_set **out_set)
 {
        struct radv_descriptor_set *set;
-       unsigned range_offset = sizeof(struct radv_descriptor_set);
+       unsigned range_offset = sizeof(struct radv_descriptor_set) +
+               sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
        unsigned mem_size = range_offset +
                sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
 
@@ -700,16 +710,25 @@ VkResult radv_FreeDescriptorSets(
 }
 
 static void write_texel_buffer_descriptor(struct radv_device *device,
+                                         struct radv_cmd_buffer *cmd_buffer,
                                          unsigned *dst,
+                                         struct radeon_winsys_bo **buffer_list,
                                          const VkBufferView _buffer_view)
 {
        RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
 
        memcpy(dst, buffer_view->state, 4 * 4);
+
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo, 7);
+       else
+               *buffer_list = buffer_view->bo;
 }
 
 static void write_buffer_descriptor(struct radv_device *device,
+                                    struct radv_cmd_buffer *cmd_buffer,
                                     unsigned *dst,
+                                    struct radeon_winsys_bo **buffer_list,
                                     const VkDescriptorBufferInfo *buffer_info)
 {
        RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -730,10 +749,15 @@ static void write_buffer_descriptor(struct radv_device *device,
                S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 7);
+       else
+               *buffer_list = buffer->bo;
 }
 
 static void write_dynamic_buffer_descriptor(struct radv_device *device,
                                             struct radv_descriptor_range *range,
+                                            struct radeon_winsys_bo **buffer_list,
                                             const VkDescriptorBufferInfo *buffer_info)
 {
        RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -746,11 +770,15 @@ static void write_dynamic_buffer_descriptor(struct radv_device *device,
        va += buffer_info->offset + buffer->offset;
        range->va = va;
        range->size = size;
+
+       *buffer_list = buffer->bo;
 }
 
 static void
 write_image_descriptor(struct radv_device *device,
+                      struct radv_cmd_buffer *cmd_buffer,
                       unsigned *dst,
+                      struct radeon_winsys_bo **buffer_list,
                       VkDescriptorType descriptor_type,
                       const VkDescriptorImageInfo *image_info)
 {
@@ -764,18 +792,25 @@ write_image_descriptor(struct radv_device *device,
        }
 
        memcpy(dst, descriptor, 16 * 4);
+
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo, 7);
+       else
+               *buffer_list = iview->bo;
 }
 
 static void
 write_combined_image_sampler_descriptor(struct radv_device *device,
+                                       struct radv_cmd_buffer *cmd_buffer,
                                        unsigned *dst,
+                                       struct radeon_winsys_bo **buffer_list,
                                        VkDescriptorType descriptor_type,
                                        const VkDescriptorImageInfo *image_info,
                                        bool has_sampler)
 {
        RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
 
-       write_image_descriptor(device, dst, descriptor_type, image_info);
+       write_image_descriptor(device, cmd_buffer, dst, buffer_list, descriptor_type, image_info);
        /* copy over sampler state */
        if (has_sampler)
                memcpy(dst + 16, sampler->state, 16);
@@ -808,7 +843,7 @@ void radv_update_descriptor_sets(
                const struct radv_descriptor_set_binding_layout *binding_layout =
                        set->layout->binding + writeset->dstBinding;
                uint32_t *ptr = set->mapped_ptr;
-
+               struct radeon_winsys_bo **buffer_list =  set->descriptors;
                /* Immutable samplers are not copied into push descriptors when they are
                 * allocated, so if we are writing push descriptors we have to copy the
                 * immutable samplers into them now.
@@ -819,6 +854,8 @@ void radv_update_descriptor_sets(
 
                ptr += binding_layout->offset / 4;
                ptr += binding_layout->size * writeset->dstArrayElement / 4;
+               buffer_list += binding_layout->buffer_offset;
+               buffer_list += writeset->dstArrayElement;
                for (j = 0; j < writeset->descriptorCount; ++j) {
                        switch(writeset->descriptorType) {
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -827,25 +864,28 @@ void radv_update_descriptor_sets(
                                idx += binding_layout->dynamic_offset_offset;
                                assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
                                write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
-                                                               writeset->pBufferInfo + j);
+                                                               buffer_list, writeset->pBufferInfo + j);
                                break;
                        }
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-                               write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
+                               write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+                                                       writeset->pBufferInfo + j);
                                break;
                        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-                               write_texel_buffer_descriptor(device, ptr, writeset->pTexelBufferView[j]);
+                               write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+                                                             writeset->pTexelBufferView[j]);
                                break;
                        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
                        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
                        case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-                               write_image_descriptor(device, ptr, writeset->descriptorType,
+                               write_image_descriptor(device, cmd_buffer, ptr, buffer_list,
+                                                      writeset->descriptorType,
                                                       writeset->pImageInfo + j);
                                break;
                        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-                               write_combined_image_sampler_descriptor(device, ptr,
+                               write_combined_image_sampler_descriptor(device, cmd_buffer, ptr, buffer_list,
                                                                        writeset->descriptorType,
                                                                        writeset->pImageInfo + j,
                                                                        !binding_layout->immutable_samplers_offset);
@@ -868,6 +908,7 @@ void radv_update_descriptor_sets(
                                break;
                        }
                        ptr += binding_layout->size / 4;
+                       ++buffer_list;
                }
 
        }
@@ -884,6 +925,8 @@ void radv_update_descriptor_sets(
                        dst_set->layout->binding + copyset->dstBinding;
                uint32_t *src_ptr = src_set->mapped_ptr;
                uint32_t *dst_ptr = dst_set->mapped_ptr;
+               struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+               struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
 
                src_ptr += src_binding_layout->offset / 4;
                dst_ptr += dst_binding_layout->offset / 4;
@@ -891,6 +934,12 @@ void radv_update_descriptor_sets(
                src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
                dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
 
+               src_buffer_list += src_binding_layout->buffer_offset;
+               src_buffer_list += copyset->srcArrayElement;
+
+               dst_buffer_list += dst_binding_layout->buffer_offset;
+               dst_buffer_list += copyset->dstArrayElement;
+
                for (j = 0; j < copyset->descriptorCount; ++j) {
                        switch (src_binding_layout->type) {
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -911,6 +960,9 @@ void radv_update_descriptor_sets(
                        }
                        src_ptr += src_binding_layout->size / 4;
                        dst_ptr += dst_binding_layout->size / 4;
+                       dst_buffer_list[j] = src_buffer_list[j];
+                       ++src_buffer_list;
+                       ++dst_buffer_list;
                }
        }
 }
@@ -952,6 +1004,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
                const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
                const struct radv_descriptor_set_binding_layout *binding_layout =
                        set_layout->binding + entry->dstBinding;
+               const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
                const uint32_t *immutable_samplers = NULL;
                uint32_t dst_offset;
                uint32_t dst_stride;
@@ -990,6 +1043,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
                        .src_stride = entry->stride,
                        .dst_offset = dst_offset,
                        .dst_stride = dst_stride,
+                       .buffer_offset = buffer_offset,
                        .has_sampler = !binding_layout->immutable_samplers_offset,
                        .immutable_samplers = immutable_samplers
                };
@@ -1022,6 +1076,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
        uint32_t i;
 
        for (i = 0; i < templ->entry_count; ++i) {
+               struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
                uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset;
                const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
                uint32_t j;
@@ -1033,28 +1088,28 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
                                const unsigned idx = templ->entry[i].dst_offset + j;
                                assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
                                write_dynamic_buffer_descriptor(device, set->dynamic_descriptors + idx,
-                                                               (struct VkDescriptorBufferInfo *) pSrc);
+                                                               buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
                                break;
                        }
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-                               write_buffer_descriptor(device, pDst,
+                               write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
                                                        (struct VkDescriptorBufferInfo *) pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-                               write_texel_buffer_descriptor(device, pDst,
+                               write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
                                                              *(VkBufferView *) pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
                        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
                        case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-                               write_image_descriptor(device, pDst,
+                               write_image_descriptor(device, cmd_buffer, pDst, buffer_list,
                                                       templ->entry[i].descriptor_type,
                                                       (struct VkDescriptorImageInfo *) pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-                               write_combined_image_sampler_descriptor(device, pDst,
+                               write_combined_image_sampler_descriptor(device, cmd_buffer, pDst, buffer_list,
                                                                        templ->entry[i].descriptor_type,
                                                                        (struct VkDescriptorImageInfo *) pSrc,
                                                                        templ->entry[i].has_sampler);
@@ -1074,6 +1129,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
                        }
                        pSrc += templ->entry[i].src_stride;
                        pDst += templ->entry[i].dst_stride;
+                       ++buffer_list;
                }
        }
 }
index d1cba953f7e4657b3f592de96d01e8987e105869..d8431241fd917c11420864e72f30f77a152b1759 100644 (file)
@@ -35,6 +35,7 @@ struct radv_descriptor_set_binding_layout {
    uint32_t array_size;
 
    uint32_t offset;
+   uint32_t buffer_offset;
    uint16_t dynamic_offset_offset;
 
    uint16_t dynamic_offset_count;
@@ -61,6 +62,9 @@ struct radv_descriptor_set_layout {
    uint16_t shader_stages;
    uint16_t dynamic_shader_stages;
 
+   /* Number of buffers in this descriptor set */
+   uint32_t buffer_count;
+
    /* Number of dynamic offsets used by this descriptor set */
    uint16_t dynamic_offset_count;
 
index 4a860c595fb903ff36566a4fe3d8f1359df1b0a8..dfe4c5f942245be676c658100e38bda4d5b423e5 100644 (file)
@@ -698,6 +698,8 @@ struct radv_descriptor_set {
        uint64_t va;
        uint32_t *mapped_ptr;
        struct radv_descriptor_range *dynamic_descriptors;
+
+       struct radeon_winsys_bo *descriptors[0];
 };
 
 struct radv_push_descriptor_set