queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
}
+static void
+radv_bo_list_init(struct radv_bo_list *bo_list)
+{
+ pthread_mutex_init(&bo_list->mutex, NULL);
+ bo_list->list.count = bo_list->capacity = 0;
+ bo_list->list.bos = NULL;
+}
+
+static void
+radv_bo_list_finish(struct radv_bo_list *bo_list)
+{
+ free(bo_list->list.bos);
+ pthread_mutex_destroy(&bo_list->mutex);
+}
+
+static VkResult radv_bo_list_add(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo)
+{
+ pthread_mutex_lock(&bo_list->mutex);
+ if (bo_list->list.count == bo_list->capacity) {
+ unsigned capacity = MAX2(4, bo_list->capacity * 2);
+ void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
+
+ if (!data) {
+ pthread_mutex_unlock(&bo_list->mutex);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ bo_list->list.bos = (struct radeon_winsys_bo**)data;
+ bo_list->capacity = capacity;
+ }
+
+ bo_list->list.bos[bo_list->list.count++] = bo;
+ pthread_mutex_unlock(&bo_list->mutex);
+ return VK_SUCCESS;
+}
+
+static void radv_bo_list_remove(struct radv_bo_list *bo_list, struct radeon_winsys_bo *bo)
+{
+ pthread_mutex_lock(&bo_list->mutex);
+ for(unsigned i = 0; i < bo_list->list.count; ++i) {
+ if (bo_list->list.bos[i] == bo) {
+ bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
+ --bo_list->list.count;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&bo_list->mutex);
+}
+
static void
radv_device_init_gs_info(struct radv_device *device)
{
mtx_init(&device->shader_slab_mutex, mtx_plain);
list_inithead(&device->shader_slabs);
+ radv_bo_list_init(&device->bo_list);
+
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
uint32_t qfi = queue_create->queueFamilyIndex;
fail_meta:
radv_device_finish_meta(device);
fail:
+ radv_bo_list_finish(&device->bo_list);
+
if (device->trace_bo)
device->ws->buffer_destroy(device->trace_bo);
radv_destroy_shader_slabs(device);
+ radv_bo_list_finish(&device->bo_list);
vk_free(&device->alloc, device);
}
ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL, &sem_info,
+ 1, NULL, NULL, &sem_info, NULL,
false, fence->fence);
radv_free_sem_info(&sem_info);
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index],
1, NULL, NULL,
- &sem_info,
+ &sem_info, NULL,
false, base_fence);
if (ret) {
radv_loge("failed to submit CS %d\n", i);
sem_info.cs_emit_wait = j == 0;
sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
+ pthread_mutex_lock(&queue->device->bo_list.mutex);
+
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
advance, initial_preamble, continue_preamble_cs,
- &sem_info,
+ &sem_info, &queue->device->bo_list.list,
can_patch, base_fence);
+ pthread_mutex_unlock(&queue->device->bo_list.mutex);
+
if (ret) {
radv_loge("failed to submit CS %d\n", i);
abort();
goto fail;
} else {
close(import_info->fd);
- goto out_success;
}
- }
-
- if (host_ptr_info) {
+ } else if (host_ptr_info) {
assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
goto fail;
} else {
mem->user_ptr = host_ptr_info->pHostPointer;
- goto out_success;
}
- }
-
- uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
- if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
- mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
- domain = RADEON_DOMAIN_GTT;
- else
- domain = RADEON_DOMAIN_VRAM;
+ } else {
+ uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
+ if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+ mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
+ domain = RADEON_DOMAIN_GTT;
+ else
+ domain = RADEON_DOMAIN_VRAM;
- if (mem_type_index == RADV_MEM_TYPE_VRAM)
- flags |= RADEON_FLAG_NO_CPU_ACCESS;
- else
- flags |= RADEON_FLAG_CPU_ACCESS;
+ if (mem_type_index == RADV_MEM_TYPE_VRAM)
+ flags |= RADEON_FLAG_NO_CPU_ACCESS;
+ else
+ flags |= RADEON_FLAG_CPU_ACCESS;
- if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
- flags |= RADEON_FLAG_GTT_WC;
+ if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+ flags |= RADEON_FLAG_GTT_WC;
- if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
- flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
+ flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
- mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
- domain, flags);
+ mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
+ domain, flags);
- if (!mem->bo) {
- result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- goto fail;
+ if (!mem->bo) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto fail;
+ }
+ mem->type_index = mem_type_index;
}
- mem->type_index = mem_type_index;
-out_success:
+
+ result = radv_bo_list_add(&device->bo_list, mem->bo);
+ if (result != VK_SUCCESS)
+ goto fail_bo;
+
*pMem = radv_device_memory_to_handle(mem);
return VK_SUCCESS;
+fail_bo:
+ device->ws->buffer_destroy(mem->bo);
fail:
vk_free2(&device->alloc, pAllocator, mem);
if (mem == NULL)
return;
+ radv_bo_list_remove(&device->bo_list, mem->bo);
device->ws->buffer_destroy(mem->bo);
mem->bo = NULL;
queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
&queue->device->empty_cs[queue->queue_family_index],
1, NULL, NULL,
- &sem_info,
+ &sem_info, NULL,
false, base_fence);
fence_emitted = true;
if (fence)
struct radeon_winsys_cs *continue_preamble_cs;
};
+struct radv_bo_list {
+ struct radv_winsys_bo_list list;
+ unsigned capacity;
+ pthread_mutex_t mutex;
+};
+
struct radv_device {
VK_LOADER_DATA _loader_data;
uint64_t dmesg_timestamp;
struct radv_device_extension_table enabled_extensions;
+
+ struct radv_bo_list bo_list;
};
struct radv_device_memory {
struct radv_winsys_sem_counts signal;
};
+struct radv_winsys_bo_list {
+ struct radeon_winsys_bo **bos;
+ unsigned count;
+};
+
struct radeon_winsys {
void (*destroy)(struct radeon_winsys *ws);
struct radeon_winsys_cs *initial_preamble_cs,
struct radeon_winsys_cs *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info,
+ const struct radv_winsys_bo_list *bo_list, /* optional */
bool can_patch,
struct radeon_winsys_fence *fence);
unsigned count,
struct radv_amdgpu_winsys_bo *extra_bo,
struct radeon_winsys_cs *extra_cs,
+ const struct radv_winsys_bo_list *radv_bo_list,
amdgpu_bo_list_handle *bo_list)
{
int r = 0;
bo_list);
free(handles);
pthread_mutex_unlock(&ws->global_bo_list_lock);
- } else if (count == 1 && !extra_bo && !extra_cs &&
+ } else if (count == 1 && !extra_bo && !extra_cs && !radv_bo_list &&
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
if (cs->num_buffers == 0) {
if (extra_cs) {
total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
}
+
+ if (radv_bo_list) {
+ total_buffer_count += radv_bo_list->count;
+ }
+
if (total_buffer_count == 0) {
*bo_list = 0;
return 0;
}
}
+ if (radv_bo_list) {
+ unsigned unique_bo_so_far = unique_bo_count;
+ const unsigned default_bo_priority = 7;
+ for (unsigned i = 0; i < radv_bo_list->count; ++i) {
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(radv_bo_list->bos[i]);
+ bool found = false;
+ for (unsigned j = 0; j < unique_bo_so_far; ++j) {
+ if (bo->bo == handles[j]) {
+ found = true;
+ priorities[j] = MAX2(priorities[j], default_bo_priority);
+ break;
+ }
+ }
+ if (!found) {
+ handles[unique_bo_count] = bo->bo;
+ priorities[unique_bo_count] = default_bo_priority;
+ ++unique_bo_count;
+ }
+ }
+ }
+
if (unique_bo_count > 0) {
r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
priorities, bo_list);
static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
int queue_idx,
struct radv_winsys_sem_info *sem_info,
+ const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
struct radeon_winsys_cs *initial_preamble_cs,
}
}
- r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
+ r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs,
+ radv_bo_list, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed for the "
"chained submission(%d)\n", r);
static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
int queue_idx,
struct radv_winsys_sem_info *sem_info,
+ const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
struct radeon_winsys_cs *initial_preamble_cs,
memset(&request, 0, sizeof(request));
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
- preamble_cs, &bo_list);
+ preamble_cs, radv_bo_list, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the fallback submission (%d)\n", r);
static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
int queue_idx,
struct radv_winsys_sem_info *sem_info,
+ const struct radv_winsys_bo_list *radv_bo_list,
struct radeon_winsys_cs **cs_array,
unsigned cs_count,
struct radeon_winsys_cs *initial_preamble_cs,
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
(struct radv_amdgpu_winsys_bo*)bo,
- preamble_cs, &bo_list);
+ preamble_cs, radv_bo_list, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the sysmem submission (%d)\n", r);
struct radeon_winsys_cs *initial_preamble_cs,
struct radeon_winsys_cs *continue_preamble_cs,
struct radv_winsys_sem_info *sem_info,
+ const struct radv_winsys_bo_list *bo_list,
bool can_patch,
struct radeon_winsys_fence *_fence)
{
assert(sem_info);
if (!cs->ws->use_ib_bos) {
- ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
+ ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
- ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
+ ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else {
- ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
+ ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, bo_list, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
}