radv: do not expose GTT as device local memory mostly for APUs
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 22 Apr 2020 14:54:28 +0000 (16:54 +0200)
committerMarge Bot <eric+marge@anholt.net>
Mon, 27 Apr 2020 22:41:41 +0000 (22:41 +0000)
On APUs, the memory is unified (all heaps are equally fast) and
apps should count all memory heaps together. But some games like
Id Tech games (Youngblood and such) don't manage memory correctly
on APUs and they spill everything when one VRAM heap is full.

Instead of spilling buffers, they should just allocate new buffers
in the second heap but it seems like these games are confused if
two memory heaps have the DEVICE_LOCAL_BIT set.

This is probably a first step towards better memory management on
APUs but there is still some work to do if we want to run most apps
with a small dedicated VRAM (256MB or so).

This gives a huge boost for Id Tech games on APUs, and doesn't
seem to reduce Feral games performance.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4771>

src/amd/vulkan/radv_device.c

index c34674d09040e0c60d9020f0142ce07ea74b2c9f..b590a92d4d59dd5c87a0b8c410301428d22fbc74 100644 (file)
@@ -180,6 +180,15 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
                        .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
                };
        }
+
+       if (device->rad_info.gart_size > 0) {
+               gart_index = device->memory_properties.memoryHeapCount++;
+               device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+                       .size = device->rad_info.gart_size,
+                       .flags = 0,
+               };
+       }
+
        if (visible_vram_size) {
                visible_vram_index = device->memory_properties.memoryHeapCount++;
                device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
@@ -187,24 +196,29 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
                        .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
                };
        }
-       if (device->rad_info.gart_size > 0) {
-               gart_index = device->memory_properties.memoryHeapCount++;
-               device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
-                       .size = device->rad_info.gart_size,
-                       .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-               };
-       }
 
        STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
        unsigned type_count = 0;
-       if (vram_index >= 0) {
-               device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
-               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-                       .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                       .heapIndex = vram_index,
-               };
+
+       if (device->rad_info.has_dedicated_vram) {
+               if (vram_index >= 0) {
+                       device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+                       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                               .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                               .heapIndex = vram_index,
+                       };
+               }
+       } else {
+               if (visible_vram_index >= 0) {
+                       device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+                       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                               .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                               .heapIndex = visible_vram_index,
+                       };
+               }
        }
-       if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
+
+       if (gart_index >= 0) {
                device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
                device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
                        .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
@@ -221,26 +235,13 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
                        .heapIndex = visible_vram_index,
                };
        }
-       if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
-               /* Put GTT after visible VRAM for GPUs without dedicated VRAM
-                * as they have identical property flags, and according to the
-                * spec, for types with identical flags, the one with greater
-                * performance must be given a lower index. */
-               device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
-               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
-                       .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-                       .heapIndex = gart_index,
-               };
-       }
+
        if (gart_index >= 0) {
                device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
                device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
                        .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-                       VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
-                       (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+                       VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
                        .heapIndex = gart_index,
                };
        }