tu: Move vsc_data and vsc_data2 allocation into the device
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 28 Jan 2020 16:30:44 +0000 (17:30 +0100)
committerConnor Abbott <cwabbott0@gmail.com>
Wed, 5 Feb 2020 14:27:28 +0000 (15:27 +0100)
In addition to preparing us for dynamically resizing them, which has to
be controlled by the device, this greatly reduces the memory usage when
allocating large numbers of command buffers, making
dEQP-VK.api.object_management.max_concurrent.command_buffer_primary go
from crash -> pass.

Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3621>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3621>

src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_private.h

index 1a4c3d7fd7d10d901fdbb6118a1921f85a4aabca..99ad052a1bc1fbef49e25483aa46c9236a7a3e3d 100644 (file)
@@ -1670,27 +1670,14 @@ tu_create_cmd_buffer(struct tu_device *device,
    if (result != VK_SUCCESS)
       goto fail_scratch_bo;
 
-#define VSC_DATA_SIZE(pitch)  ((pitch) * 32 + 0x100)  /* extra size to store VSC_SIZE */
-#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
-
-   /* TODO: resize on overflow or compute a max size from # of vertices in renderpass?? */
-   cmd_buffer->vsc_data_pitch = 0x440 * 4;
-   cmd_buffer->vsc_data2_pitch = 0x1040 * 4;
-
-   result = tu_bo_init_new(device, &cmd_buffer->vsc_data, VSC_DATA_SIZE(cmd_buffer->vsc_data_pitch));
-   if (result != VK_SUCCESS)
-      goto fail_vsc_data;
-
-   result = tu_bo_init_new(device, &cmd_buffer->vsc_data2, VSC_DATA2_SIZE(cmd_buffer->vsc_data2_pitch));
-   if (result != VK_SUCCESS)
-      goto fail_vsc_data2;
+   /* TODO: resize on overflow */
+   cmd_buffer->vsc_data_pitch = device->vsc_data_pitch;
+   cmd_buffer->vsc_data2_pitch = device->vsc_data2_pitch;
+   cmd_buffer->vsc_data = device->vsc_data;
+   cmd_buffer->vsc_data2 = device->vsc_data2;
 
    return VK_SUCCESS;
 
-fail_vsc_data2:
-   tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
-fail_vsc_data:
-   tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
 fail_scratch_bo:
    list_del(&cmd_buffer->pool_link);
    return result;
@@ -1700,8 +1687,6 @@ static void
 tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
 {
    tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo);
-   tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data);
-   tu_bo_finish(cmd_buffer->device, &cmd_buffer->vsc_data2);
 
    list_del(&cmd_buffer->pool_link);
 
index df846dac63b4fc7073e81ad60b92a95380104344..b68f0f4540fe89d1350195377b95dd930143dbff 100644 (file)
@@ -1074,7 +1074,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
       if (!device->queues[qfi]) {
          result = VK_ERROR_OUT_OF_HOST_MEMORY;
-         goto fail;
+         goto fail_queues;
       }
 
       memset(device->queues[qfi], 0,
@@ -1086,13 +1086,27 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
          result = tu_queue_init(device, &device->queues[qfi][q], qfi, q,
                                 queue_create->flags);
          if (result != VK_SUCCESS)
-            goto fail;
+            goto fail_queues;
       }
    }
 
    device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id);
    if (!device->compiler)
-      goto fail;
+      goto fail_queues;
+
+#define VSC_DATA_SIZE(pitch)  ((pitch) * 32 + 0x100)  /* extra size to store VSC_SIZE */
+#define VSC_DATA2_SIZE(pitch) ((pitch) * 32)
+
+   device->vsc_data_pitch = 0x440 * 4;
+   device->vsc_data2_pitch = 0x1040 * 4;
+
+   result = tu_bo_init_new(device, &device->vsc_data, VSC_DATA_SIZE(device->vsc_data_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data;
+
+   result = tu_bo_init_new(device, &device->vsc_data2, VSC_DATA2_SIZE(device->vsc_data2_pitch));
+   if (result != VK_SUCCESS)
+      goto fail_vsc_data2;
 
    VkPipelineCacheCreateInfo ci;
    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
@@ -1104,14 +1118,23 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
    result =
       tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
    if (result != VK_SUCCESS)
-      goto fail;
+      goto fail_pipeline_cache;
 
    device->mem_cache = tu_pipeline_cache_from_handle(pc);
 
    *pDevice = tu_device_to_handle(device);
    return VK_SUCCESS;
 
-fail:
+fail_pipeline_cache:
+   tu_bo_finish(device, &device->vsc_data2);
+
+fail_vsc_data2:
+   tu_bo_finish(device, &device->vsc_data);
+
+fail_vsc_data:
+   ralloc_free(device->compiler);
+
+fail_queues:
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
@@ -1119,9 +1142,6 @@ fail:
          vk_free(&device->alloc, device->queues[i]);
    }
 
-   if (device->compiler)
-      ralloc_free(device->compiler);
-
    vk_free(&device->alloc, device);
    return result;
 }
@@ -1134,6 +1154,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    if (!device)
       return;
 
+   tu_bo_finish(device, &device->vsc_data);
+   tu_bo_finish(device, &device->vsc_data2);
+
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++)
          tu_queue_finish(&device->queues[i][q]);
index 3a1bb331b9d288286d725a1fe22d1466a0a61d6c..e0e76bd179e1589c2c21323a05f7f93909724078 100644 (file)
@@ -453,6 +453,14 @@ struct tu_queue
    struct tu_fence submit_fence;
 };
 
+struct tu_bo
+{
+   uint32_t gem_handle;
+   uint64_t size;
+   uint64_t iova;
+   void *map;
+};
+
 struct tu_device
 {
    VK_LOADER_DATA _loader_data;
@@ -473,20 +481,17 @@ struct tu_device
    /* Backup in-memory cache to be used if the app doesn't provide one */
    struct tu_pipeline_cache *mem_cache;
 
+   struct tu_bo vsc_data;
+   struct tu_bo vsc_data2;
+   uint32_t vsc_data_pitch;
+   uint32_t vsc_data2_pitch;
+
    struct list_head shader_slabs;
    mtx_t shader_slab_mutex;
 
    struct tu_device_extension_table enabled_extensions;
 };
 
-struct tu_bo
-{
-   uint32_t gem_handle;
-   uint64_t size;
-   uint64_t iova;
-   void *map;
-};
-
 VkResult
 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
 VkResult