anv: Make all VkDeviceMemory BOs resident permanently
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 27 Feb 2019 00:05:34 +0000 (18:05 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Fri, 19 Apr 2019 19:56:42 +0000 (19:56 +0000)
We spend a lot of time in the driver adding things to hash sets to track
residency.  The reality is that a properly built Vulkan app uses large
memory objects and sub-allocates from them.  In a typical frame, most of
if not all of those allocations are going to be resident for the entire
frame so we're really not saving ourselves much by tracking fine-grained
residency.  Just throwing everything in the validation list does make it
a little bit more expensive inside the kernel to walk the list and
ensure that all our VA is in order.  However, without relocations, the
overhead of that is pretty small.

If we ever do run into a memory pressure situation where the fine-
grained residency could even potentially help, we would likely be
swapping one page out to make room for another within the draw call and
performance is totally lost at that point.  We're better off swapping
out other apps and just letting ours run a whole frame.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c

index cf40d8b7123877354587575827994ac156f5abeb..098b1a39514a3e2893f279c11c6e5e73beb5f6ed 100644 (file)
@@ -1393,18 +1393,13 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
       anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
                              &cmd_buffer->device->alloc);
 
-      /* Add the BOs for all the pinned buffers */
-      if (cmd_buffer->device->pinned_buffers->entries) {
-         struct set *pinned_bos = _mesa_pointer_set_create(NULL);
-         if (pinned_bos == NULL)
-            return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
-         set_foreach(cmd_buffer->device->pinned_buffers, entry) {
-            const struct anv_buffer *buffer = entry->key;
-            _mesa_set_add(pinned_bos, buffer->address.bo);
-         }
-         anv_execbuf_add_bo_set(execbuf, pinned_bos, 0,
-                                &cmd_buffer->device->alloc);
-         _mesa_set_destroy(pinned_bos, NULL);
+      /* Add the BOs for all memory objects */
+      list_for_each_entry(struct anv_device_memory, mem,
+                          &cmd_buffer->device->memory_objects, link) {
+         result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
+                                     &cmd_buffer->device->alloc);
+         if (result != VK_SUCCESS)
+            return result;
       }
 
       struct anv_block_pool *pool;
index 991ca50f7dd3d36fe454bcb11bf60818cd59b50a..2af09d4f1227bcd6f2887b52b02f60c47bf033ba 100644 (file)
@@ -2005,6 +2005,8 @@ VkResult anv_CreateDevice(
          high_heap->size;
    }
 
+   list_inithead(&device->memory_objects);
+
    /* As per spec, the driver implementation may deny requests to acquire
     * a priority above the default priority (MEDIUM) if the caller does not
     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
@@ -2118,9 +2120,6 @@ VkResult anv_CreateDevice(
    if (device->info.gen >= 10)
       anv_device_init_hiz_clear_value_bo(device);
 
-   if (physical_device->use_softpin)
-      device->pinned_buffers = _mesa_pointer_set_create(NULL);
-
    anv_scratch_pool_init(device, &device->scratch_pool);
 
    anv_queue_init(device, &device->queue);
@@ -2211,9 +2210,6 @@ void anv_DestroyDevice(
 
    anv_queue_finish(&device->queue);
 
-   if (physical_device->use_softpin)
-      _mesa_set_destroy(device->pinned_buffers, NULL);
-
 #ifdef HAVE_VALGRIND
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
@@ -2698,6 +2694,10 @@ VkResult anv_AllocateMemory(
    }
 
  success:
+   pthread_mutex_lock(&device->mutex);
+   list_addtail(&mem->link, &device->memory_objects);
+   pthread_mutex_unlock(&device->mutex);
+
    *pMem = anv_device_memory_to_handle(mem);
 
    return VK_SUCCESS;
@@ -2789,6 +2789,10 @@ void anv_FreeMemory(
    if (mem == NULL)
       return;
 
+   pthread_mutex_lock(&device->mutex);
+   list_del(&mem->link);
+   pthread_mutex_unlock(&device->mutex);
+
    if (mem->map)
       anv_UnmapMemory(_device, _mem);
 
@@ -3324,12 +3328,6 @@ VkResult anv_CreateBuffer(
    buffer->usage = pCreateInfo->usage;
    buffer->address = ANV_NULL_ADDRESS;
 
-   if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) {
-      pthread_mutex_lock(&device->mutex);
-      _mesa_set_add(device->pinned_buffers, buffer);
-      pthread_mutex_unlock(&device->mutex);
-   }
-
    *pBuffer = anv_buffer_to_handle(buffer);
 
    return VK_SUCCESS;
@@ -3346,12 +3344,6 @@ void anv_DestroyBuffer(
    if (!buffer)
       return;
 
-   if (buffer->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) {
-      pthread_mutex_lock(&device->mutex);
-      _mesa_set_remove_key(device->pinned_buffers, buffer);
-      pthread_mutex_unlock(&device->mutex);
-   }
-
    vk_free2(&device->alloc, pAllocator, buffer);
 }
 
index 80416ea8f818cb2c5c22a0941e143ca3ea2c7df7..bfeade7ce4878c90b0ad178d211a0a5123ef1b0a 100644 (file)
@@ -1093,6 +1093,9 @@ struct anv_device {
     uint64_t                                    vma_lo_available;
     uint64_t                                    vma_hi_available;
 
+    /** List of all anv_device_memory objects */
+    struct list_head                            memory_objects;
+
     struct anv_bo_pool                          batch_bo_pool;
 
     struct anv_bo_cache                         bo_cache;
@@ -1106,12 +1109,6 @@ struct anv_device {
     struct anv_bo                               trivial_batch_bo;
     struct anv_bo                               hiz_clear_bo;
 
-    /* Set of pointers to anv_buffer objects for all pinned buffers.  Pinned
-     * buffers are always resident because they could be used at any time via
-     * VK_EXT_buffer_device_address.
-     */
-    struct set *                                pinned_buffers;
-
     struct anv_pipeline_cache                   default_pipeline_cache;
     struct blorp_context                        blorp;
 
@@ -1483,6 +1480,8 @@ _anv_combine_address(struct anv_batch *batch, void *location,
 #define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
 
 struct anv_device_memory {
+   struct list_head                             link;
+
    struct anv_bo *                              bo;
    struct anv_memory_type *                     type;
    VkDeviceSize                                 map_size;
index b6d935c690180dab7ebd2fd7bdceeffbf4865d80..a6d061e7ca02c8f00d1903ecadabf873aec9cfe7 100644 (file)
@@ -2045,6 +2045,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
    if (bt_state->map == NULL)
       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 
+   /* We only need to emit relocs if we're not using softpin.  If we are using
+    * softpin then we always keep all user-allocated memory objects resident.
+    */
+   const bool need_client_mem_relocs =
+      !cmd_buffer->device->instance->physicalDevice.use_softpin;
+
    /* We only use push constant space for images before gen9 */
    if (map->image_param_count > 0) {
       VkResult result =
@@ -2122,8 +2128,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                                        cmd_buffer->state.compute.num_workgroups,
                                        12, 1);
          bt_map[s] = surface_state.offset + state_offset;
-         add_surface_reloc(cmd_buffer, surface_state,
-                           cmd_buffer->state.compute.num_workgroups);
+         if (need_client_mem_relocs) {
+            add_surface_reloc(cmd_buffer, surface_state,
+                              cmd_buffer->state.compute.num_workgroups);
+         }
          continue;
       } else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
          /* This is a descriptor set buffer so the set index is actually
@@ -2155,7 +2163,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
          surface_state = sstate.state;
          assert(surface_state.alloc_size);
-         add_surface_state_relocs(cmd_buffer, sstate);
+         if (need_client_mem_relocs)
+            add_surface_state_relocs(cmd_buffer, sstate);
          break;
       }
       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
@@ -2170,7 +2179,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
                desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
             surface_state = sstate.state;
             assert(surface_state.alloc_size);
-            add_surface_state_relocs(cmd_buffer, sstate);
+            if (need_client_mem_relocs)
+               add_surface_state_relocs(cmd_buffer, sstate);
          } else {
             /* For color input attachments, we create the surface state at
              * vkBeginRenderPass time so that we can include aux and clear
@@ -2189,7 +2199,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             : desc->image_view->planes[binding->plane].storage_surface_state;
          surface_state = sstate.state;
          assert(surface_state.alloc_size);
-         add_surface_state_relocs(cmd_buffer, sstate);
+         if (need_client_mem_relocs)
+            add_surface_state_relocs(cmd_buffer, sstate);
          if (devinfo->gen < 9) {
             /* We only need the image params on gen8 and earlier.  No image
              * workarounds that require tiling information are required on
@@ -2210,8 +2221,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
          surface_state = desc->buffer_view->surface_state;
          assert(surface_state.alloc_size);
-         add_surface_reloc(cmd_buffer, surface_state,
-                           desc->buffer_view->address);
+         if (need_client_mem_relocs) {
+            add_surface_reloc(cmd_buffer, surface_state,
+                              desc->buffer_view->address);
+         }
          break;
 
       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -2235,7 +2248,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
          anv_fill_buffer_surface_state(cmd_buffer->device, surface_state,
                                        format, address, range, 1);
-         add_surface_reloc(cmd_buffer, surface_state, address);
+         if (need_client_mem_relocs)
+            add_surface_reloc(cmd_buffer, surface_state, address);
          break;
       }
 
@@ -2244,8 +2258,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             ? desc->buffer_view->writeonly_storage_surface_state
             : desc->buffer_view->storage_surface_state;
          assert(surface_state.alloc_size);
-         add_surface_reloc(cmd_buffer, surface_state,
-                           desc->buffer_view->address);
+         if (need_client_mem_relocs) {
+            add_surface_reloc(cmd_buffer, surface_state,
+                              desc->buffer_view->address);
+         }
          if (devinfo->gen < 9) {
             assert(image < MAX_GEN8_IMAGES);
             struct brw_image_param *image_param =