From ed79f805faf1ac5919a30d3284e37cc3f394e464 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 11 May 2020 18:46:04 +0200 Subject: [PATCH] tu: Add a "scratch bo" allocation mechanism This is simpler than a full-blown memory reuse mechanism, but is good enough to make sure that repeatedly doing a copy that requires the linear staging buffer workaround won't use excessive memory or be slowed down due to repeated allocations. Part-of: --- src/freedreno/vulkan/tu_device.c | 54 +++++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_private.h | 20 ++++++++++++ 2 files changed, 74 insertions(+) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 668f4e79d3e..dfe4df85e32 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -39,6 +39,7 @@ #include "compiler/glsl_types.h" #include "util/debug.h" #include "util/disk_cache.h" +#include "util/u_atomic.h" #include "vk_format.h" #include "vk_util.h" @@ -1256,6 +1257,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, device->mem_cache = tu_pipeline_cache_from_handle(pc); + for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) + mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain); + *pDevice = tu_device_to_handle(device); return VK_SUCCESS; @@ -1302,6 +1306,11 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) vk_free(&device->alloc, device->queues[i]); } + for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) { + if (device->scratch_bos[i].initialized) + tu_bo_finish(device, &device->scratch_bos[i].bo); + } + /* the compiler does not use pAllocator */ ralloc_free(device->compiler); @@ -1311,6 +1320,51 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) vk_free(&device->alloc, device); } +VkResult +tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo) +{ + unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2); + unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2; + assert(index < ARRAY_SIZE(dev->scratch_bos)); + + for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) { + if (p_atomic_read(&dev->scratch_bos[i].initialized)) { + /* Fast path: just return the already-allocated BO. */ + *bo = &dev->scratch_bos[i].bo; + return VK_SUCCESS; + } + } + + /* Slow path: actually allocate the BO. We take a lock because the process + * of allocating it is slow, and we don't want to block the CPU while it + * finishes. + */ + mtx_lock(&dev->scratch_bos[index].construct_mtx); + + /* Another thread may have allocated it already while we were waiting on + * the lock. We need to check this in order to avoid double-allocating. + */ + if (dev->scratch_bos[index].initialized) { + mtx_unlock(&dev->scratch_bos[index].construct_mtx); + *bo = &dev->scratch_bos[index].bo; + return VK_SUCCESS; + } + + unsigned bo_size = 1ull << size_log2; + VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size); + if (result != VK_SUCCESS) { + mtx_unlock(&dev->scratch_bos[index].construct_mtx); + return result; + } + + p_atomic_set(&dev->scratch_bos[index].initialized, true); + + mtx_unlock(&dev->scratch_bos[index].construct_mtx); + + *bo = &dev->scratch_bos[index].bo; + return VK_SUCCESS; +} + VkResult tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties) diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 99da10a568b..1c32e60000a 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -509,6 +509,17 @@ struct tu_device uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; +#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */ + + /* Currently the kernel driver uses a 32-bit GPU address space, but it + * should be impossible to go beyond 48 bits. + */ + struct { + struct tu_bo bo; + mtx_t construct_mtx; + bool initialized; + } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2]; + struct tu_bo border_color; struct list_head shader_slabs; @@ -531,6 +542,15 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo); VkResult tu_bo_map(struct tu_device *dev, struct tu_bo *bo); +/* Get a scratch bo for use inside a command buffer. This will always return + * the same bo given the same size or similar sizes, so only one scratch bo + * can be used at the same time. It's meant for short-lived things where we + * need to write to some piece of memory, read from it, and then immediately + * discard it. + */ +VkResult +tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo); + struct tu_cs_entry { /* No ownership */ -- 2.30.2