#include "compiler/glsl_types.h"
#include "util/debug.h"
#include "util/disk_cache.h"
+#include "util/u_atomic.h"
#include "vk_format.h"
#include "vk_util.h"
device->mem_cache = tu_pipeline_cache_from_handle(pc);
+ for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
+ mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
+
*pDevice = tu_device_to_handle(device);
return VK_SUCCESS;
vk_free(&device->alloc, device->queues[i]);
}
+ for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
+ if (device->scratch_bos[i].initialized)
+ tu_bo_finish(device, &device->scratch_bos[i].bo);
+ }
+
/* the compiler does not use pAllocator */
ralloc_free(device->compiler);
vk_free(&device->alloc, device);
}
+VkResult
+tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
+{
+ unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
+ unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
+ assert(index < ARRAY_SIZE(dev->scratch_bos));
+
+ for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
+ if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
+ /* Fast path: just return the already-allocated BO. */
+ *bo = &dev->scratch_bos[i].bo;
+ return VK_SUCCESS;
+ }
+ }
+
+ /* Slow path: actually allocate the BO. We take a lock because the process
+ * of allocating it is slow, and we don't want to block the CPU while it
+ * finishes.
+ */
+ mtx_lock(&dev->scratch_bos[index].construct_mtx);
+
+ /* Another thread may have allocated it already while we were waiting on
+ * the lock. We need to check this in order to avoid double-allocating.
+ */
+ if (dev->scratch_bos[index].initialized) {
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+ *bo = &dev->scratch_bos[index].bo;
+ return VK_SUCCESS;
+ }
+
+ unsigned bo_size = 1ull << size_log2;
+ VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size);
+ if (result != VK_SUCCESS) {
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+ return result;
+ }
+
+ p_atomic_set(&dev->scratch_bos[index].initialized, true);
+
+ mtx_unlock(&dev->scratch_bos[index].construct_mtx);
+
+ *bo = &dev->scratch_bos[index].bo;
+ return VK_SUCCESS;
+}
+
VkResult
tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
uint32_t vsc_draw_strm_pitch;
uint32_t vsc_prim_strm_pitch;
+#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
+
+ /* Currently the kernel driver uses a 32-bit GPU address space, but it
+ * should be impossible to go beyond 48 bits.
+ */
+ struct {
+ struct tu_bo bo;
+ mtx_t construct_mtx;
+ bool initialized;
+ } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
+
struct tu_bo border_color;
struct list_head shader_slabs;
VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
+/* Get a scratch bo for use inside a command buffer. This will always return
+ * the same bo given the same size or similar sizes, so only one scratch bo
+ * can be used at the same time. It's meant for short-lived things where we
+ * need to write to some piece of memory, read from it, and then immediately
+ * discard it.
+ */
+VkResult
+tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
+
struct tu_cs_entry
{
/* No ownership */