anv: Use separate MOCS settings for external BOs
authorJason Ekstrand <jason.ekstrand@intel.com>
Mon, 9 Jul 2018 21:21:33 +0000 (14:21 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 3 Oct 2018 14:03:03 +0000 (09:03 -0500)
On Broadwell and above, we have to use different MOCS settings to allow
the kernel to take over and disable caching when needed for external
buffers.  On Broadwell, this is especially important because the kernel
can't disable eLLC so we have to do it in userspace.  We very badly
don't want to do that on everything so we need separate MOCS for
external and internal BOs.

In order to do this, we add an anv-specific BO flag for "external" and
use that to distinguish between buffers which may be shared with other
processes and/or display and those which are entirely internal.  That,
together with an anv_mocs_for_bo helper lets us choose the right MOCS
settings for each BO use.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
12 files changed:
src/intel/vulkan/anv_allocator.c
src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_blorp.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_image.c
src/intel/vulkan/anv_intel.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/gen7_cmd_buffer.c
src/intel/vulkan/gen8_cmd_buffer.c
src/intel/vulkan/genX_cmd_buffer.c
src/intel/vulkan/genX_gpu_memcpy.c
src/intel/vulkan/genX_state.c

index ab01d46cbebdec86b2279fd9a5979ecf45ebb960..f62d48ae3fe37af626c4e17a3e6091225f5ba74d 100644 (file)
@@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle)
    (EXEC_OBJECT_WRITE | \
     EXEC_OBJECT_ASYNC | \
     EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
-    EXEC_OBJECT_PINNED)
+    EXEC_OBJECT_PINNED | \
+    ANV_BO_EXTERNAL)
 
 VkResult
 anv_bo_cache_alloc(struct anv_device *device,
@@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
                     struct anv_bo **bo_out)
 {
    assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
+   assert(bo_flags & ANV_BO_EXTERNAL);
 
    pthread_mutex_lock(&cache->mutex);
 
@@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
        * client has imported a BO twice in different ways and they get what
        * they have coming.
        */
-      uint64_t new_flags = 0;
+      uint64_t new_flags = ANV_BO_EXTERNAL;
       new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
       new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
       new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
@@ -1411,6 +1413,12 @@ anv_bo_cache_export(struct anv_device *device,
    assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
    struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
 
+   /* This BO must have been flagged external in order for us to be able
+    * to export it.  This is done based on external options passed into
+    * anv_AllocateMemory.
+    */
+   assert(bo->bo.flags & ANV_BO_EXTERNAL);
+
    int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
    if (fd < 0)
       return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
index 0f7c8325ea4085f46ae70bbadb303146941decfe..3e13553ac18cd6e39be1a0755fe0d77cb56e7f00 100644 (file)
@@ -1088,7 +1088,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
       obj->relocs_ptr = 0;
       obj->alignment = 0;
       obj->offset = bo->offset;
-      obj->flags = bo->flags | extra_flags;
+      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
       obj->rsvd1 = 0;
       obj->rsvd2 = 0;
    }
index a1c359cf461eb52097bc4cb4ff10f3ad9e7c2f06..478b8e7a3db4ee90cfcdfc925e1fa7dc28f3900f 100644 (file)
@@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
       .addr = {
          .buffer = buffer->address.bo,
          .offset = buffer->address.offset + offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, buffer->address.bo),
       },
    };
 
@@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
       .addr = {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       },
    };
 
@@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
       blorp_surf->aux_addr = (struct blorp_address) {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + aux_surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       };
       blorp_surf->aux_usage = aux_usage;
 
@@ -668,12 +668,12 @@ void anv_CmdCopyBuffer(
       struct blorp_address src = {
          .buffer = src_buffer->address.bo,
          .offset = src_buffer->address.offset + pRegions[r].srcOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
       };
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
@@ -726,7 +726,7 @@ void anv_CmdUpdateBuffer(
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, copy_size);
@@ -1436,7 +1436,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
          .buffer = image->planes[0].address.bo,
          .offset = image->planes[0].address.offset +
                    image->planes[0].shadow_surface.offset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device,
+                                 image->planes[0].address.bo),
       },
    };
 
index 8f0cbad305fbaaa6d03e084106bb008f29b72f81..6a24d1086d895073a295c719e3ebb495ddb26c8b 100644 (file)
@@ -2255,8 +2255,8 @@ VkResult anv_AllocateMemory(
              fd_info->handleType ==
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
-      result = anv_bo_cache_import(device, &device->bo_cache,
-                                   fd_info->fd, bo_flags, &mem->bo);
+      result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
+                                   bo_flags | ANV_BO_EXTERNAL, &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;
 
@@ -2293,6 +2293,11 @@ VkResult anv_AllocateMemory(
        */
       close(fd_info->fd);
    } else {
+      const VkExportMemoryAllocateInfoKHR *fd_info =
+         vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+      if (fd_info && fd_info->handleTypes)
+         bo_flags |= ANV_BO_EXTERNAL;
+
       result = anv_bo_cache_alloc(device, &device->bo_cache,
                                   pAllocateInfo->allocationSize, bo_flags,
                                   &mem->bo);
index 9f7964ae37eba894f8e40c871fb71e3be7510351..e89ce012be7c31f2410af8b2487043193497e318 100644 (file)
@@ -1100,7 +1100,7 @@ anv_image_fill_surface_state(struct anv_device *device,
                             .size_B = surface->isl.size_B,
                             .format = ISL_FORMAT_RAW,
                             .stride_B = 1,
-                            .mocs = device->default_mocs);
+                            .mocs = anv_mocs_for_bo(device, address.bo));
       state_inout->address = address,
       state_inout->aux_address = ANV_NULL_ADDRESS;
       state_inout->clear_address = ANV_NULL_ADDRESS;
@@ -1201,7 +1201,8 @@ anv_image_fill_surface_state(struct anv_device *device,
                           .aux_address = anv_address_physical(aux_address),
                           .clear_address = anv_address_physical(clear_address),
                           .use_clear_address = !anv_address_is_null(clear_address),
-                          .mocs = device->default_mocs,
+                          .mocs = anv_mocs_for_bo(device,
+                                                  state_inout->address.bo),
                           .x_offset_sa = tile_x_sa,
                           .y_offset_sa = tile_y_sa);
 
index 06db5787a9c83778b900b226afbb3536514e4014..ed1bc096c661b6d1a1c1d95369a7401d5987c3a7 100644 (file)
@@ -73,7 +73,7 @@ VkResult anv_CreateDmaBufImageINTEL(
 
    image = anv_image_from_handle(image_h);
 
-   uint64_t bo_flags = 0;
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
    if (device->instance->physicalDevice.supports_48bit_addresses)
       bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
    if (device->instance->physicalDevice.use_softpin)
index 60f40c7e2ae62bfbf9c24dda6f6e7660b16f80e7..5b4c286bf38a362fc7aa178e5438478bcd5430cf 100644 (file)
@@ -556,6 +556,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
    return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
 }
 
+/* Extra ANV-defined BO flags which won't be passed to the kernel */
+#define ANV_BO_EXTERNAL    (1ull << 31)
+#define ANV_BO_FLAG_MASK   (1ull << 31)
+
 struct anv_bo {
    uint32_t gem_handle;
 
@@ -1013,6 +1017,7 @@ struct anv_device {
     struct anv_scratch_pool                     scratch_pool;
 
     uint32_t                                    default_mocs;
+    uint32_t                                    external_mocs;
 
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
@@ -1042,6 +1047,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
    anv_state_pool_free(anv_binding_table_pool(device), state);
 }
 
+static inline uint32_t
+anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
+{
+   if (bo->flags & ANV_BO_EXTERNAL)
+      return device->external_mocs;
+   else
+      return device->default_mocs;
+}
+
 static void inline
 anv_state_flush(struct anv_device *device, struct anv_state state)
 {
@@ -1323,6 +1337,12 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .AgeforQUADLRU = 0                                       \
    }
 
+#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {     \
+      .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,  \
+      .TargetCache = L3DefertoPATforLLCeLLCselection,                      \
+      .AgeforQUADLRU = 0                                                   \
+   }
+
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
@@ -1332,9 +1352,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN9_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) {  \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                       \
+      .IndextoMOCSTables                           = 1                  \
    }
 
 /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
@@ -1343,9 +1363,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN10_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) {   \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
+      .IndextoMOCSTables                           = 1                     \
    }
 
 /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
@@ -1354,9 +1374,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN11_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) {   \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
+      .IndextoMOCSTables                           = 1                     \
    }
 
 struct anv_device_memory {
index cf1f8ee282954f68711fb8113fa881223a6a1632..da51cb9781cf6eaea7c6d3633b4a3a913606adca 100644 (file)
@@ -246,7 +246,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
          ib.CutIndexEnable             = pipeline->primitive_restart;
 #endif
          ib.IndexFormat                = cmd_buffer->state.gfx.gen7.index_type;
-         ib.MemoryObjectControlState   = GENX(MOCS);
+         ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                         buffer->address.bo);
 
          ib.BufferStartingAddress      = anv_address_add(buffer->address,
                                                          offset);
index ca2baf84a19b67c943c7ca77dad16b31423e4797..752d04f30135ab55f9db909fff6e4bb6eb8232bd 100644 (file)
@@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
       ib.IndexFormat                = vk_to_gen_index_type[indexType];
-      ib.MemoryObjectControlState   = GENX(MOCS);
+      ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                      buffer->address.bo);
       ib.BufferStartingAddress      = anv_address_add(buffer->address, offset);
       ib.BufferSize                 = buffer->size - offset;
    }
index 099c30f3d66e95359a5f7dc96189eb521d8bbd98..c3a7e5c83c3c2f6daa60358e0dab3378690797a3 100644 (file)
@@ -2546,12 +2546,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          struct GENX(VERTEX_BUFFER_STATE) state = {
             .VertexBufferIndex = vb,
 
-#if GEN_GEN >= 8
-            .MemoryObjectControlState = GENX(MOCS),
-#else
+            .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+                                                buffer->address.bo),
+#if GEN_GEN <= 7
             .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
             .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
-            .VertexBufferMemoryObjectControlState = GENX(MOCS),
 #endif
 
             .AddressModifyEnable = true,
@@ -2666,12 +2665,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
          .VertexBufferIndex = index,
          .AddressModifyEnable = true,
          .BufferPitch = 0,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .BufferSize = size
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .EndAddress = anv_address_add(addr, size),
 #endif
@@ -3423,9 +3421,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
    if (dw == NULL)
       return;
 
-   struct isl_depth_stencil_hiz_emit_info info = {
-      .mocs = device->default_mocs,
-   };
+   struct isl_depth_stencil_hiz_emit_info info = { };
 
    if (iview)
       info.view = &iview->planes[0].isl;
@@ -3443,6 +3439,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
                               image->planes[depth_plane].address.bo,
                               image->planes[depth_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
 
       const uint32_t ds =
          cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
@@ -3474,6 +3472,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
                               image->planes[stencil_plane].address.bo,
                               image->planes[stencil_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
    }
 
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
index fd78f4d125b668b2d4597d97b7918723ebaef80d..81522986550240786098087f286453720a8747e3 100644 (file)
@@ -167,11 +167,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
          .AddressModifyEnable = true,
          .BufferStartingAddress = src,
          .BufferPitch = bs,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferSize = size,
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .EndAddress = anv_address_add(src, size - 1),
 #endif
       });
@@ -228,7 +227,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
       sob.SOBufferIndex = 0;
-      sob.SOBufferObjectControlState = GENX(MOCS);
+      sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
       sob.SurfaceBaseAddress = dst;
 
 #if GEN_GEN >= 8
index aa5bce5a801a1f58c8f8cec6395b8a20f52da755..75bcd96d78a1907af388797561aef3093b986887 100644 (file)
@@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
 {
    GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
                                           &GENX(MOCS));
+#if GEN_GEN >= 8
+   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
+                                          &GENX(EXTERNAL_MOCS));
+#else
+   device->external_mocs = device->default_mocs;
+#endif
 
    struct anv_batch batch;