On Broadwell and above, we have to use different MOCS settings to allow
the kernel to take over and disable caching when needed for external
buffers. On Broadwell, this is especially important because the kernel
can't disable eLLC so we have to do it in userspace. We very badly
don't want to do that on everything so we need separate MOCS for
external and internal BOs.
In order to do this, we add an anv-specific BO flag for "external" and
use that to distinguish between buffers which may be shared with other
processes and/or display and those which are entirely internal. That,
together with an anv_mocs_for_bo helper lets us choose the right MOCS
settings for each BO use.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99507
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
12 files changed:
(EXEC_OBJECT_WRITE | \
EXEC_OBJECT_ASYNC | \
EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
(EXEC_OBJECT_WRITE | \
EXEC_OBJECT_ASYNC | \
EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
+ EXEC_OBJECT_PINNED | \
+ ANV_BO_EXTERNAL)
VkResult
anv_bo_cache_alloc(struct anv_device *device,
VkResult
anv_bo_cache_alloc(struct anv_device *device,
struct anv_bo **bo_out)
{
assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
struct anv_bo **bo_out)
{
assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
+ assert(bo_flags & ANV_BO_EXTERNAL);
pthread_mutex_lock(&cache->mutex);
pthread_mutex_lock(&cache->mutex);
* client has imported a BO twice in different ways and they get what
* they have coming.
*/
* client has imported a BO twice in different ways and they get what
* they have coming.
*/
- uint64_t new_flags = 0;
+ uint64_t new_flags = ANV_BO_EXTERNAL;
new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
+ /* This BO must have been flagged external in order for us to be able
+ * to export it. This is done based on external options passed into
+ * anv_AllocateMemory.
+ */
+ assert(bo->bo.flags & ANV_BO_EXTERNAL);
+
int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
if (fd < 0)
return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
if (fd < 0)
return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
obj->relocs_ptr = 0;
obj->alignment = 0;
obj->offset = bo->offset;
obj->relocs_ptr = 0;
obj->alignment = 0;
obj->offset = bo->offset;
- obj->flags = bo->flags | extra_flags;
+ obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
obj->rsvd1 = 0;
obj->rsvd2 = 0;
}
obj->rsvd1 = 0;
obj->rsvd2 = 0;
}
.addr = {
.buffer = buffer->address.bo,
.offset = buffer->address.offset + offset,
.addr = {
.buffer = buffer->address.bo,
.offset = buffer->address.offset + offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, buffer->address.bo),
.addr = {
.buffer = image->planes[plane].address.bo,
.offset = image->planes[plane].address.offset + surface->offset,
.addr = {
.buffer = image->planes[plane].address.bo,
.offset = image->planes[plane].address.offset + surface->offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
blorp_surf->aux_addr = (struct blorp_address) {
.buffer = image->planes[plane].address.bo,
.offset = image->planes[plane].address.offset + aux_surface->offset,
blorp_surf->aux_addr = (struct blorp_address) {
.buffer = image->planes[plane].address.bo,
.offset = image->planes[plane].address.offset + aux_surface->offset,
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
};
blorp_surf->aux_usage = aux_usage;
};
blorp_surf->aux_usage = aux_usage;
struct blorp_address src = {
.buffer = src_buffer->address.bo,
.offset = src_buffer->address.offset + pRegions[r].srcOffset,
struct blorp_address src = {
.buffer = src_buffer->address.bo,
.offset = src_buffer->address.offset + pRegions[r].srcOffset,
- .mocs = cmd_buffer->device->default_mocs,
+ .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
};
struct blorp_address dst = {
.buffer = dst_buffer->address.bo,
.offset = dst_buffer->address.offset + pRegions[r].dstOffset,
};
struct blorp_address dst = {
.buffer = dst_buffer->address.bo,
.offset = dst_buffer->address.offset + pRegions[r].dstOffset,
- .mocs = cmd_buffer->device->default_mocs,
+ .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
};
blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
};
blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
struct blorp_address dst = {
.buffer = dst_buffer->address.bo,
.offset = dst_buffer->address.offset + dstOffset,
struct blorp_address dst = {
.buffer = dst_buffer->address.bo,
.offset = dst_buffer->address.offset + dstOffset,
- .mocs = cmd_buffer->device->default_mocs,
+ .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
};
blorp_buffer_copy(&batch, src, dst, copy_size);
};
blorp_buffer_copy(&batch, src, dst, copy_size);
.buffer = image->planes[0].address.bo,
.offset = image->planes[0].address.offset +
image->planes[0].shadow_surface.offset,
.buffer = image->planes[0].address.bo,
.offset = image->planes[0].address.offset +
image->planes[0].shadow_surface.offset,
- .mocs = cmd_buffer->device->default_mocs,
+ .mocs = anv_mocs_for_bo(cmd_buffer->device,
+ image->planes[0].address.bo),
fd_info->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
fd_info->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
- result = anv_bo_cache_import(device, &device->bo_cache,
- fd_info->fd, bo_flags, &mem->bo);
+ result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
+ bo_flags | ANV_BO_EXTERNAL, &mem->bo);
if (result != VK_SUCCESS)
goto fail;
if (result != VK_SUCCESS)
goto fail;
*/
close(fd_info->fd);
} else {
*/
close(fd_info->fd);
} else {
+ const VkExportMemoryAllocateInfoKHR *fd_info =
+ vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+ if (fd_info && fd_info->handleTypes)
+ bo_flags |= ANV_BO_EXTERNAL;
+
result = anv_bo_cache_alloc(device, &device->bo_cache,
pAllocateInfo->allocationSize, bo_flags,
&mem->bo);
result = anv_bo_cache_alloc(device, &device->bo_cache,
pAllocateInfo->allocationSize, bo_flags,
&mem->bo);
.size_B = surface->isl.size_B,
.format = ISL_FORMAT_RAW,
.stride_B = 1,
.size_B = surface->isl.size_B,
.format = ISL_FORMAT_RAW,
.stride_B = 1,
- .mocs = device->default_mocs);
+ .mocs = anv_mocs_for_bo(device, address.bo));
state_inout->address = address,
state_inout->aux_address = ANV_NULL_ADDRESS;
state_inout->clear_address = ANV_NULL_ADDRESS;
state_inout->address = address,
state_inout->aux_address = ANV_NULL_ADDRESS;
state_inout->clear_address = ANV_NULL_ADDRESS;
.aux_address = anv_address_physical(aux_address),
.clear_address = anv_address_physical(clear_address),
.use_clear_address = !anv_address_is_null(clear_address),
.aux_address = anv_address_physical(aux_address),
.clear_address = anv_address_physical(clear_address),
.use_clear_address = !anv_address_is_null(clear_address),
- .mocs = device->default_mocs,
+ .mocs = anv_mocs_for_bo(device,
+ state_inout->address.bo),
.x_offset_sa = tile_x_sa,
.y_offset_sa = tile_y_sa);
.x_offset_sa = tile_x_sa,
.y_offset_sa = tile_y_sa);
image = anv_image_from_handle(image_h);
image = anv_image_from_handle(image_h);
+ uint64_t bo_flags = ANV_BO_EXTERNAL;
if (device->instance->physicalDevice.supports_48bit_addresses)
bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
if (device->instance->physicalDevice.use_softpin)
if (device->instance->physicalDevice.supports_48bit_addresses)
bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
if (device->instance->physicalDevice.use_softpin)
return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
}
return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
}
+/* Extra ANV-defined BO flags which won't be passed to the kernel */
+#define ANV_BO_EXTERNAL (1ull << 31)
+#define ANV_BO_FLAG_MASK (1ull << 31)
+
struct anv_bo {
uint32_t gem_handle;
struct anv_bo {
uint32_t gem_handle;
struct anv_scratch_pool scratch_pool;
uint32_t default_mocs;
struct anv_scratch_pool scratch_pool;
uint32_t default_mocs;
+ uint32_t external_mocs;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
anv_state_pool_free(anv_binding_table_pool(device), state);
}
anv_state_pool_free(anv_binding_table_pool(device), state);
}
+static inline uint32_t
+anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
+{
+ if (bo->flags & ANV_BO_EXTERNAL)
+ return device->external_mocs;
+ else
+ return device->default_mocs;
+}
+
static void inline
anv_state_flush(struct anv_device *device, struct anv_state state)
{
static void inline
anv_state_flush(struct anv_device *device, struct anv_state state)
{
+#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \
+ .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, \
+ .TargetCache = L3DefertoPATforLLCeLLCselection, \
+ .AgeforQUADLRU = 0 \
+ }
+
/* Skylake: MOCS is now an index into an array of 62 different caching
* configurations programmed by the kernel.
*/
/* Skylake: MOCS is now an index into an array of 62 different caching
* configurations programmed by the kernel.
*/
.IndextoMOCSTables = 2 \
}
.IndextoMOCSTables = 2 \
}
-#define GEN9_MOCS_PTE { \
- /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
- .IndextoMOCSTables = 1 \
+#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \
+ /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
+ .IndextoMOCSTables = 1 \
}
/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
}
/* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
.IndextoMOCSTables = 2 \
}
.IndextoMOCSTables = 2 \
}
-#define GEN10_MOCS_PTE { \
- /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
- .IndextoMOCSTables = 1 \
+#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) { \
+ /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
+ .IndextoMOCSTables = 1 \
}
/* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
}
/* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
.IndextoMOCSTables = 2 \
}
.IndextoMOCSTables = 2 \
}
-#define GEN11_MOCS_PTE { \
- /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
- .IndextoMOCSTables = 1 \
+#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) { \
+ /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \
+ .IndextoMOCSTables = 1 \
}
struct anv_device_memory {
}
struct anv_device_memory {
ib.CutIndexEnable = pipeline->primitive_restart;
#endif
ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type;
ib.CutIndexEnable = pipeline->primitive_restart;
#endif
ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type;
- ib.MemoryObjectControlState = GENX(MOCS);
+ ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+ buffer->address.bo);
ib.BufferStartingAddress = anv_address_add(buffer->address,
offset);
ib.BufferStartingAddress = anv_address_add(buffer->address,
offset);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
ib.IndexFormat = vk_to_gen_index_type[indexType];
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
ib.IndexFormat = vk_to_gen_index_type[indexType];
- ib.MemoryObjectControlState = GENX(MOCS);
+ ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+ buffer->address.bo);
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferSize = buffer->size - offset;
}
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferSize = buffer->size - offset;
}
struct GENX(VERTEX_BUFFER_STATE) state = {
.VertexBufferIndex = vb,
struct GENX(VERTEX_BUFFER_STATE) state = {
.VertexBufferIndex = vb,
-#if GEN_GEN >= 8
- .MemoryObjectControlState = GENX(MOCS),
-#else
+ .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+ buffer->address.bo),
+#if GEN_GEN <= 7
.BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
.InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
.BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
.InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
- .VertexBufferMemoryObjectControlState = GENX(MOCS),
#endif
.AddressModifyEnable = true,
#endif
.AddressModifyEnable = true,
.VertexBufferIndex = index,
.AddressModifyEnable = true,
.BufferPitch = 0,
.VertexBufferIndex = index,
.AddressModifyEnable = true,
.BufferPitch = 0,
+ .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
- .MemoryObjectControlState = GENX(MOCS),
.BufferStartingAddress = addr,
.BufferSize = size
#else
.BufferStartingAddress = addr,
.BufferSize = size
#else
- .VertexBufferMemoryObjectControlState = GENX(MOCS),
.BufferStartingAddress = addr,
.EndAddress = anv_address_add(addr, size),
#endif
.BufferStartingAddress = addr,
.EndAddress = anv_address_add(addr, size),
#endif
- struct isl_depth_stencil_hiz_emit_info info = {
- .mocs = device->default_mocs,
- };
+ struct isl_depth_stencil_hiz_emit_info info = { };
if (iview)
info.view = &iview->planes[0].isl;
if (iview)
info.view = &iview->planes[0].isl;
image->planes[depth_plane].address.bo,
image->planes[depth_plane].address.offset +
surface->offset);
image->planes[depth_plane].address.bo,
image->planes[depth_plane].address.offset +
surface->offset);
+ info.mocs =
+ anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
const uint32_t ds =
cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
const uint32_t ds =
cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
image->planes[stencil_plane].address.bo,
image->planes[stencil_plane].address.offset +
surface->offset);
image->planes[stencil_plane].address.bo,
image->planes[stencil_plane].address.offset +
surface->offset);
+ info.mocs =
+ anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
}
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
}
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
.AddressModifyEnable = true,
.BufferStartingAddress = src,
.BufferPitch = bs,
.AddressModifyEnable = true,
.BufferStartingAddress = src,
.BufferPitch = bs,
+ .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo),
- .MemoryObjectControlState = GENX(MOCS),
.BufferSize = size,
#else
.BufferSize = size,
#else
- .VertexBufferMemoryObjectControlState = GENX(MOCS),
.EndAddress = anv_address_add(src, size - 1),
#endif
});
.EndAddress = anv_address_add(src, size - 1),
#endif
});
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = 0;
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = 0;
- sob.SOBufferObjectControlState = GENX(MOCS);
+ sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo),
sob.SurfaceBaseAddress = dst;
#if GEN_GEN >= 8
sob.SurfaceBaseAddress = dst;
#if GEN_GEN >= 8
{
GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
&GENX(MOCS));
{
GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
&GENX(MOCS));
+#if GEN_GEN >= 8
+ GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
+ &GENX(EXTERNAL_MOCS));
+#else
+ device->external_mocs = device->default_mocs;
+#endif