From a1bd426393fb24a53d2cb180b21db4473ed71330 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg?= Date: Tue, 19 May 2015 14:14:24 -0700 Subject: [PATCH] vk: Stream surface state instead of using the surface pool Since the binding table pointer is only 16 bits, we can only have 64kb of binding table state allocated at any given time. With a block size of 1kb, that amounts to just 64 command buffers, which is not enough. --- src/vulkan/aub.c | 4 +- src/vulkan/device.c | 112 ++++++++++++++++++++++++++++++++----------- src/vulkan/meta.c | 25 ++++++++-- src/vulkan/private.h | 4 +- 4 files changed, 112 insertions(+), 33 deletions(-) diff --git a/src/vulkan/aub.c b/src/vulkan/aub.c index 5e66aa839e3..bb4772a39ea 100644 --- a/src/vulkan/aub.c +++ b/src/vulkan/aub.c @@ -261,8 +261,8 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) } relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); - relocate_bo(&device->surface_state_block_pool.bo, - &batch->surf_relocs, aub_bos); + relocate_bo(&cmd_buffer->surface_bo, + &cmd_buffer->surface_relocs, aub_bos); for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { bo = cmd_buffer->exec2_bos[i]; diff --git a/src/vulkan/device.c b/src/vulkan/device.c index 3c245cd036b..64a2e492e29 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -481,15 +481,20 @@ anv_batch_init(struct anv_batch *batch, struct anv_device *device) batch->bo.map = anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); if (batch->bo.map == NULL) { - anv_gem_close(device, batch->bo.gem_handle); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); + goto fail_bo; } batch->cmd_relocs.num_relocs = 0; - batch->surf_relocs.num_relocs = 0; batch->next = batch->bo.map; return VK_SUCCESS; + + fail_bo: + anv_gem_close(device, batch->bo.gem_handle); + + return result; + } void @@ -504,7 +509,6 @@ anv_batch_reset(struct anv_batch *batch) { batch->next = batch->bo.map; batch->cmd_relocs.num_relocs = 0; - batch->surf_relocs.num_relocs = 0; } void * @@ -568,7 +572,6 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) offset = batch->next - batch->bo.map; anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); - anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset); batch->next += size; } @@ -926,6 +929,8 @@ anv_cmd_buffer_destructor(struct anv_device * device, { struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; + anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); + anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); @@ -2073,12 +2078,27 @@ VkResult anv_CreateCommandBuffer( if (result != VK_SUCCESS) goto fail; + result = anv_bo_init_new(&cmd_buffer->surface_bo, device, BATCH_SIZE); + if (result != VK_SUCCESS) + goto fail_batch; + + cmd_buffer->surface_bo.map = + anv_gem_mmap(device, cmd_buffer->surface_bo.gem_handle, 0, BATCH_SIZE); + if (cmd_buffer->surface_bo.map == NULL) { + result = vk_error(VK_ERROR_MEMORY_MAP_FAILED); + goto fail_surface_bo; + } + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + cmd_buffer->surface_relocs.num_relocs = 0; + cmd_buffer->exec2_objects = anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); if (cmd_buffer->exec2_objects == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_batch; + goto fail_surface_map; } cmd_buffer->exec2_bos = @@ -2105,6 +2125,10 @@ VkResult anv_CreateCommandBuffer( fail_exec2_objects: anv_device_free(device, cmd_buffer->exec2_objects); + fail_surface_map: + anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE); + fail_surface_bo: + anv_gem_close(device, cmd_buffer->surface_bo.gem_handle); fail_batch: anv_batch_finish(&cmd_buffer->batch, device); fail: @@ -2130,7 +2154,7 @@ VkResult anv_BeginCommandBuffer( .GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSizeModifyEnable = true, - .SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 }, + .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 }, .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ .SurfaceStateBaseAddressModifyEnable = true, @@ -2277,13 +2301,13 @@ VkResult anv_EndCommandBuffer( pthread_mutex_lock(&device->mutex); /* Add block pool bos first so we can add them with their relocs. */ - anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo, - &batch->surf_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo, + &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); - anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs); + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; @@ -2313,6 +2337,8 @@ VkResult anv_ResetCommandBuffer( struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; anv_batch_reset(&cmd_buffer->batch); + cmd_buffer->surface_next = 0; + cmd_buffer->surface_relocs.num_relocs = 0; return VK_SUCCESS; } @@ -2363,6 +2389,22 @@ void anv_CmdBindDynamicStateObject( }; } +static struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment); + state.map = cmd_buffer->surface_bo.map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size < cmd_buffer->surface_bo.size); + + return state; +} + void anv_CmdBindDescriptorSets( VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -2392,8 +2434,11 @@ void anv_CmdBindDescriptorSets( for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; - bindings->descriptors[s].surfaces[start + b] = - view->surface_state.offset; + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + memcpy(state.map, view->surface_state.map, 64); + + bindings->descriptors[s].surfaces[start + b] = state.offset; bindings->descriptors[s].relocs[start + b].bo = view->bo; bindings->descriptors[s].relocs[start + b].offset = view->offset; } @@ -2480,24 +2525,33 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) if (layers + surface_count > 0) { struct anv_state state; + uint32_t offset; + uint32_t *address; uint32_t size; size = (bias + surface_count) * sizeof(uint32_t); - state = anv_state_stream_alloc(&cmd_buffer->binding_table_state_stream, - size, 32); + state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); memcpy(state.map, bindings->descriptors[s].surfaces, size); - for (uint32_t i = 0; i < layers; i++) - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t), - bindings->descriptors[s].relocs[i].bo, - bindings->descriptors[s].relocs[i].offset); + for (uint32_t i = 0; i < layers; i++) { + offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); + address = cmd_buffer->surface_bo.map + offset; + + *address = + anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, + bindings->descriptors[s].relocs[i].bo, + bindings->descriptors[s].relocs[i].offset); + } + + for (uint32_t i = 0; i < surface_count; i++) { + offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t); + address = cmd_buffer->surface_bo.map + offset; - for (uint32_t i = 0; i < surface_count; i++) - anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, - bindings->descriptors[s].surfaces[bias + i] + 8 * sizeof(int32_t), - bindings->descriptors[s].relocs[bias + i].bo, - bindings->descriptors[s].relocs[bias + i].offset); + *address = + anv_reloc_list_add(&cmd_buffer->surface_relocs, offset, + bindings->descriptors[s].relocs[bias + i].bo, + bindings->descriptors[s].relocs[bias + i].offset); + } static const uint32_t binding_table_opcodes[] = { [VK_SHADER_STAGE_VERTEX] = 38, @@ -2519,7 +2573,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) size_t size; size = layout->stage[s].sampler_count * 16; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); memcpy(state.map, bindings->descriptors[s].samplers, size); static const uint32_t sampler_state_opcodes[] = { @@ -3086,7 +3140,11 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer) for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { struct anv_surface_view *view = framebuffer->color_attachments[i]; - bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = view->surface_state.offset; + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + memcpy(state.map, view->surface_state.map, 64); + + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset; } diff --git a/src/vulkan/meta.c b/src/vulkan/meta.c index b088e777dd2..976a0ee3b6a 100644 --- a/src/vulkan/meta.c +++ b/src/vulkan/meta.c @@ -174,6 +174,27 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; } +static void +anv_cmd_buffer_copy_render_targets(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state) +{ + struct anv_framebuffer *fb = cmd_buffer->framebuffer; + struct anv_bindings *old_bindings = state->old_bindings; + struct anv_bindings *bindings = cmd_buffer->bindings; + + for (uint32_t i = 0; i < fb->color_attachment_count; i++) { + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i]; + + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo; + bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = + old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset; + } + + cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; +} + struct vue_header { uint32_t Reserved; uint32_t RTAIndex; @@ -241,9 +262,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer, }; anv_cmd_buffer_save(cmd_buffer, &saved_state); - - /* Initialize render targets for the meta bindings. */ - anv_cmd_buffer_fill_render_targets(cmd_buffer); + anv_cmd_buffer_copy_render_targets(cmd_buffer, &saved_state); anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, (VkBuffer[]) { diff --git a/src/vulkan/private.h b/src/vulkan/private.h index e47d5556a9e..e4534f26e8c 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -368,7 +368,6 @@ struct anv_batch { struct anv_bo bo; void * next; struct anv_reloc_list cmd_relocs; - struct anv_reloc_list surf_relocs; }; VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); @@ -549,6 +548,9 @@ struct anv_cmd_buffer { uint32_t bo_count; struct anv_batch batch; + struct anv_bo surface_bo; + uint32_t surface_next; + struct anv_reloc_list surface_relocs; struct anv_state_stream binding_table_state_stream; struct anv_state_stream surface_state_stream; struct anv_state_stream dynamic_state_stream; -- 2.30.2