From: Jason Ekstrand Date: Wed, 27 Feb 2019 00:05:34 +0000 (-0600) Subject: anv: Make all VkDeviceMemory BOs resident permanently X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=83b943cc2f2408087795ee2bd984477a1749a530;p=mesa.git anv: Make all VkDeviceMemory BOs resident permanently We spend a lot of time in the driver adding things to hash sets to track residency. The reality is that a properly built Vulkan app uses large memory objects and sub-allocates from them. In a typical frame, most of if not all of those allocations are going to be resident for the entire frame so we're really not saving ourselves much by tracking fine-grained residency. Just throwing everything in the validation list does make it a little bit more expensive inside the kernel to walk the list and ensure that all our VA is in order. However, without relocations, the overhead of that is pretty small. If we ever do run into a memory pressure situation where the fine- grained residency could even potentially help, we would likely be swapping one page out to make room for another within the draw call and performance is totally lost at that point. We're better off swapping out other apps and just letting ours run a whole frame. Reviewed-by: Lionel Landwerlin Reviewed-by: Caio Marcelo de Oliveira Filho --- diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index cf40d8b7123..098b1a39514 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1393,18 +1393,13 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0, &cmd_buffer->device->alloc); - /* Add the BOs for all the pinned buffers */ - if (cmd_buffer->device->pinned_buffers->entries) { - struct set *pinned_bos = _mesa_pointer_set_create(NULL); - if (pinned_bos == NULL) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - set_foreach(cmd_buffer->device->pinned_buffers, entry) { - const struct anv_buffer *buffer = entry->key; - _mesa_set_add(pinned_bos, buffer->address.bo); - } - anv_execbuf_add_bo_set(execbuf, pinned_bos, 0, - &cmd_buffer->device->alloc); - _mesa_set_destroy(pinned_bos, NULL); + /* Add the BOs for all memory objects */ + list_for_each_entry(struct anv_device_memory, mem, + &cmd_buffer->device->memory_objects, link) { + result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; } struct anv_block_pool *pool; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 991ca50f7dd..2af09d4f122 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2005,6 +2005,8 @@ VkResult anv_CreateDevice( high_heap->size; } + list_inithead(&device->memory_objects); + /* As per spec, the driver implementation may deny requests to acquire * a priority above the default priority (MEDIUM) if the caller does not * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT @@ -2118,9 +2120,6 @@ VkResult anv_CreateDevice( if (device->info.gen >= 10) anv_device_init_hiz_clear_value_bo(device); - if (physical_device->use_softpin) - device->pinned_buffers = _mesa_pointer_set_create(NULL); - anv_scratch_pool_init(device, &device->scratch_pool); anv_queue_init(device, &device->queue); @@ -2211,9 +2210,6 @@ void anv_DestroyDevice( anv_queue_finish(&device->queue); - if (physical_device->use_softpin) - _mesa_set_destroy(device->pinned_buffers, NULL); - #ifdef HAVE_VALGRIND /* We only need to free these to prevent valgrind errors. The backing * BO will go away in a couple of lines so we don't actually leak. @@ -2698,6 +2694,10 @@ VkResult anv_AllocateMemory( } success: + pthread_mutex_lock(&device->mutex); + list_addtail(&mem->link, &device->memory_objects); + pthread_mutex_unlock(&device->mutex); + *pMem = anv_device_memory_to_handle(mem); return VK_SUCCESS; @@ -2789,6 +2789,10 @@ void anv_FreeMemory( if (mem == NULL) return; + pthread_mutex_lock(&device->mutex); + list_del(&mem->link); + pthread_mutex_unlock(&device->mutex); + if (mem->map) anv_UnmapMemory(_device, _mem); @@ -3324,12 +3328,6 @@ VkResult anv_CreateBuffer( buffer->usage = pCreateInfo->usage; buffer->address = ANV_NULL_ADDRESS; - if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) { - pthread_mutex_lock(&device->mutex); - _mesa_set_add(device->pinned_buffers, buffer); - pthread_mutex_unlock(&device->mutex); - } - *pBuffer = anv_buffer_to_handle(buffer); return VK_SUCCESS; @@ -3346,12 +3344,6 @@ void anv_DestroyBuffer( if (!buffer) return; - if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) { - pthread_mutex_lock(&device->mutex); - _mesa_set_remove_key(device->pinned_buffers, buffer); - pthread_mutex_unlock(&device->mutex); - } - vk_free2(&device->alloc, pAllocator, buffer); } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 80416ea8f81..bfeade7ce48 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1093,6 +1093,9 @@ struct anv_device { uint64_t vma_lo_available; uint64_t vma_hi_available; + /** List of all anv_device_memory objects */ + struct list_head memory_objects; + struct anv_bo_pool batch_bo_pool; struct anv_bo_cache bo_cache; @@ -1106,12 +1109,6 @@ struct anv_device { struct anv_bo trivial_batch_bo; struct anv_bo hiz_clear_bo; - /* Set of pointers to anv_buffer objects for all pinned buffers. Pinned - * buffers are always resident because they could be used at any time via - * VK_EXT_buffer_device_address. - */ - struct set * pinned_buffers; - struct anv_pipeline_cache default_pipeline_cache; struct blorp_context blorp; @@ -1483,6 +1480,8 @@ _anv_combine_address(struct anv_batch *batch, void *location, #define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS struct anv_device_memory { + struct list_head link; + struct anv_bo * bo; struct anv_memory_type * type; VkDeviceSize map_size; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b6d935c6901..a6d061e7ca0 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2045,6 +2045,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; + /* We only need to emit relocs if we're not using softpin. If we are using + * softpin then we always keep all user-allocated memory objects resident. + */ + const bool need_client_mem_relocs = + !cmd_buffer->device->instance->physicalDevice.use_softpin; + /* We only use push constant space for images before gen9 */ if (map->image_param_count > 0) { VkResult result = @@ -2122,8 +2128,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.compute.num_workgroups, 12, 1); bt_map[s] = surface_state.offset + state_offset; - add_surface_reloc(cmd_buffer, surface_state, - cmd_buffer->state.compute.num_workgroups); + if (need_client_mem_relocs) { + add_surface_reloc(cmd_buffer, surface_state, + cmd_buffer->state.compute.num_workgroups); + } continue; } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) { /* This is a descriptor set buffer so the set index is actually @@ -2155,7 +2163,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, desc->image_view->planes[binding->plane].optimal_sampler_surface_state; surface_state = sstate.state; assert(surface_state.alloc_size); - add_surface_state_relocs(cmd_buffer, sstate); + if (need_client_mem_relocs) + add_surface_state_relocs(cmd_buffer, sstate); break; } case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: @@ -2170,7 +2179,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, desc->image_view->planes[binding->plane].optimal_sampler_surface_state; surface_state = sstate.state; assert(surface_state.alloc_size); - add_surface_state_relocs(cmd_buffer, sstate); + if (need_client_mem_relocs) + add_surface_state_relocs(cmd_buffer, sstate); } else { /* For color input attachments, we create the surface state at * vkBeginRenderPass time so that we can include aux and clear @@ -2189,7 +2199,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, : desc->image_view->planes[binding->plane].storage_surface_state; surface_state = sstate.state; assert(surface_state.alloc_size); - add_surface_state_relocs(cmd_buffer, sstate); + if (need_client_mem_relocs) + add_surface_state_relocs(cmd_buffer, sstate); if (devinfo->gen < 9) { /* We only need the image params on gen8 and earlier. No image * workarounds that require tiling information are required on @@ -2210,8 +2221,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: surface_state = desc->buffer_view->surface_state; assert(surface_state.alloc_size); - add_surface_reloc(cmd_buffer, surface_state, - desc->buffer_view->address); + if (need_client_mem_relocs) { + add_surface_reloc(cmd_buffer, surface_state, + desc->buffer_view->address); + } break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -2235,7 +2248,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, format, address, range, 1); - add_surface_reloc(cmd_buffer, surface_state, address); + if (need_client_mem_relocs) + add_surface_reloc(cmd_buffer, surface_state, address); break; } @@ -2244,8 +2258,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, ? desc->buffer_view->writeonly_storage_surface_state : desc->buffer_view->storage_surface_state; assert(surface_state.alloc_size); - add_surface_reloc(cmd_buffer, surface_state, - desc->buffer_view->address); + if (need_client_mem_relocs) { + add_surface_reloc(cmd_buffer, surface_state, + desc->buffer_view->address); + } if (devinfo->gen < 9) { assert(image < MAX_GEN8_IMAGES); struct brw_image_param *image_param =