radv: implement VK_AMD_device_coherent_memory
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 13 Nov 2019 07:58:37 +0000 (08:58 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 18 Nov 2019 08:20:19 +0000 (08:20 +0000)
This extension adds the device coherent and device uncached memory
types. It's known to be slower than non-device coherent memory but
it might be useful for debugging.

This is only exposed for chips that support L2 uncached.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_extensions.py
src/amd/vulkan/radv_private.h

index b561980c123ce4914d6a6b836601f7ec4cbd76fd..d368fb765f7a108c7c90a0288679926e251dd6d7 100644 (file)
@@ -129,6 +129,42 @@ radv_get_vram_size(struct radv_physical_device *device)
        return device->rad_info.vram_size - radv_get_visible_vram_size(device);
 }
 
+static bool
+radv_is_mem_type_vram(enum radv_mem_type type)
+{
+       return type == RADV_MEM_TYPE_VRAM ||
+              type == RADV_MEM_TYPE_VRAM_UNCACHED;
+}
+
+static bool
+radv_is_mem_type_vram_visible(enum radv_mem_type type)
+{
+       return type == RADV_MEM_TYPE_VRAM_CPU_ACCESS ||
+              type == RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED;
+}
+static bool
+radv_is_mem_type_gtt_wc(enum radv_mem_type type)
+{
+       return type == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+              type == RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED;
+}
+
+static bool
+radv_is_mem_type_gtt_cached(enum radv_mem_type type)
+{
+       return type == RADV_MEM_TYPE_GTT_CACHED ||
+              type == RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED;
+}
+
+static bool
+radv_is_mem_type_uncached(enum radv_mem_type type)
+{
+       return type == RADV_MEM_TYPE_VRAM_UNCACHED ||
+              type == RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED ||
+              type == RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED ||
+              type == RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED;
+}
+
 static void
 radv_physical_device_init_mem_types(struct radv_physical_device *device)
 {
@@ -209,6 +245,46 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
                };
        }
        device->memory_properties.memoryTypeCount = type_count;
+
+       if (device->rad_info.has_l2_uncached) {
+               for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
+                       VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
+
+                       if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
+                           mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+                               enum radv_mem_type mem_type_id;
+
+                               switch (device->mem_type_indices[i]) {
+                               case RADV_MEM_TYPE_VRAM:
+                                       mem_type_id = RADV_MEM_TYPE_VRAM_UNCACHED;
+                                       break;
+                               case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
+                                       mem_type_id = RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED;
+                                       break;
+                               case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
+                                       mem_type_id = RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED;
+                                       break;
+                               case RADV_MEM_TYPE_GTT_CACHED:
+                                       mem_type_id = RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED;
+                                       break;
+                               default:
+                                       unreachable("invalid memory type");
+                               }
+
+                               VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
+                                       VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
+                                       VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
+
+                               device->mem_type_indices[type_count] = mem_type_id;
+                               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                                       .propertyFlags = property_flags,
+                                       .heapIndex = mem_type.heapIndex,
+                               };
+                       }
+               }
+               device->memory_properties.memoryTypeCount = type_count;
+       }
 }
 
 static void
@@ -1095,6 +1171,12 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->computeFullSubgroups = true;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
+                       VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
+                               (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
+                       features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
+                       break;
+               }
                default:
                        break;
                }
@@ -1726,8 +1808,7 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
        for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
                uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
 
-               switch (device->mem_type_indices[i]) {
-               case RADV_MEM_TYPE_VRAM:
+               if (radv_is_mem_type_vram(device->mem_type_indices[i])) {
                        heap_usage = device->ws->query_value(device->ws,
                                                             RADEON_ALLOCATED_VRAM);
 
@@ -1737,8 +1818,7 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
 
                        memoryBudget->heapBudget[heap_index] = heap_budget;
                        memoryBudget->heapUsage[heap_index] = heap_usage;
-                       break;
-               case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
+               } else if (radv_is_mem_type_vram_visible(device->mem_type_indices[i])) {
                        heap_usage = device->ws->query_value(device->ws,
                                                             RADEON_ALLOCATED_VRAM_VIS);
 
@@ -1748,8 +1828,7 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
 
                        memoryBudget->heapBudget[heap_index] = heap_budget;
                        memoryBudget->heapUsage[heap_index] = heap_usage;
-                       break;
-               case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
+               } else if (radv_is_mem_type_gtt_wc(device->mem_type_indices[i])) {
                        heap_usage = device->ws->query_value(device->ws,
                                                             RADEON_ALLOCATED_GTT);
 
@@ -1759,9 +1838,6 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
 
                        memoryBudget->heapBudget[heap_index] = heap_budget;
                        memoryBudget->heapUsage[heap_index] = heap_usage;
-                       break;
-               default:
-                       break;
                }
        }
 
@@ -1803,7 +1879,7 @@ VkResult radv_GetMemoryHostPointerPropertiesEXT(
                const struct radv_physical_device *physical_device = device->physical_device;
                uint32_t memoryTypeBits = 0;
                for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
-                       if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
+                       if (radv_is_mem_type_gtt_cached(physical_device->mem_type_indices[i])) {
                                memoryTypeBits = (1 << i);
                                break;
                        }
@@ -4499,7 +4575,7 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                }
        } else if (host_ptr_info) {
                assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
-               assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
+               assert(radv_is_mem_type_gtt_cached(mem_type_index));
                mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
                                                      pAllocateInfo->allocationSize,
                                                      priority);
@@ -4511,18 +4587,18 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                }
        } else {
                uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-               if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-                   mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
+               if (radv_is_mem_type_gtt_wc(mem_type_index) ||
+                   radv_is_mem_type_gtt_cached(mem_type_index))
                        domain = RADEON_DOMAIN_GTT;
                else
                        domain = RADEON_DOMAIN_VRAM;
 
-               if (mem_type_index == RADV_MEM_TYPE_VRAM)
+               if (radv_is_mem_type_vram(mem_type_index))
                        flags |= RADEON_FLAG_NO_CPU_ACCESS;
                else
                        flags |= RADEON_FLAG_CPU_ACCESS;
 
-               if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+               if (radv_is_mem_type_gtt_wc(mem_type_index))
                        flags |= RADEON_FLAG_GTT_WC;
 
                if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
@@ -4532,6 +4608,11 @@ static VkResult radv_alloc_memory(struct radv_device *device,
                        }
                }
 
+               if (radv_is_mem_type_uncached(mem_type_index)) {
+                       assert(device->physical_device->rad_info.has_l2_uncached);
+                       flags |= RADEON_FLAG_VA_UNCACHED;
+               }
+
                mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
                                                    domain, flags, priority);
 
index a4983ba0f6156d88616c924aae65a53c8e14d24b..fab36bf53d7955e77aee58112a63d01d7b746661 100644 (file)
@@ -144,6 +144,7 @@ EXTENSIONS = [
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_EXT_ycbcr_image_arrays',                1, True),
     Extension('VK_AMD_buffer_marker',                     1, True),
+    Extension('VK_AMD_device_coherent_memory',            1, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
     Extension('VK_AMD_gpu_shader_half_float',             1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
index 4bfcef18589b754fc1c7ee83986a786e76758971..ea434ec16f89df0c029f0fe6e8672ce247cc0737 100644 (file)
@@ -121,6 +121,10 @@ enum radv_mem_type {
        RADV_MEM_TYPE_GTT_WRITE_COMBINE,
        RADV_MEM_TYPE_VRAM_CPU_ACCESS,
        RADV_MEM_TYPE_GTT_CACHED,
+       RADV_MEM_TYPE_VRAM_UNCACHED,
+       RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED,
+       RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED,
+       RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED,
        RADV_MEM_TYPE_COUNT
 };