radv: Implement VK_EXT_discard_rectangles.
[mesa.git] / src / amd / vulkan / radv_device.c
index eb2587212c8ca69a7ac19047e3edd6347adff9d0..baffa41d31694301e74b326b3685c13a86696ea3 100644 (file)
@@ -29,7 +29,9 @@
 #include <string.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include "radv_debug.h"
 #include "radv_private.h"
+#include "radv_shader.h"
 #include "radv_cs.h"
 #include "util/disk_cache.h"
 #include "util/strtod.h"
@@ -37,7 +39,6 @@
 #include <xf86drm.h>
 #include <amdgpu.h>
 #include <amdgpu_drm.h>
-#include "amdgpu_id.h"
 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
 #include "ac_llvm_util.h"
 #include "vk_format.h"
@@ -63,204 +64,123 @@ radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
 }
 
 static void
-radv_get_device_uuid(drmDevicePtr device, void *uuid) {
-       memset(uuid, 0, VK_UUID_SIZE);
-       memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
-       memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
-       memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
-       memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
-}
-
-static const VkExtensionProperties instance_extensions[] = {
-       {
-               .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
-               .specVersion = 25,
-       },
-#ifdef VK_USE_PLATFORM_XCB_KHR
-       {
-               .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
-               .specVersion = 6,
-       },
-#endif
-#ifdef VK_USE_PLATFORM_XLIB_KHR
-       {
-               .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
-               .specVersion = 6,
-       },
-#endif
-#ifdef VK_USE_PLATFORM_WAYLAND_KHR
-       {
-               .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
-               .specVersion = 6,
-       },
-#endif
-       {
-               .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-};
-
-static const VkExtensionProperties common_device_extensions[] = {
-       {
-               .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-               .specVersion = 68,
-       },
-       {
-               .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-};
-static const VkExtensionProperties ext_sema_device_extensions[] = {
-       {
-               .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-       {
-               .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
-               .specVersion = 1,
-       },
-};
-
-static VkResult
-radv_extensions_register(struct radv_instance *instance,
-                       struct radv_extensions *extensions,
-                       const VkExtensionProperties *new_ext,
-                       uint32_t num_ext)
+radv_get_driver_uuid(void *uuid)
 {
-       size_t new_size;
-       VkExtensionProperties *new_ptr;
-
-       assert(new_ext && num_ext > 0);
-
-       if (!new_ext)
-               return VK_ERROR_INITIALIZATION_FAILED;
-
-       new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
-       new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
-                               new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-
-       /* Old array continues to be valid, update nothing */
-       if (!new_ptr)
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-       memcpy(&new_ptr[extensions->num_ext], new_ext,
-               num_ext * sizeof(VkExtensionProperties));
-       extensions->ext_array = new_ptr;
-       extensions->num_ext += num_ext;
-
-       return VK_SUCCESS;
+       ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
 }
 
 static void
-radv_extensions_finish(struct radv_instance *instance,
-                       struct radv_extensions *extensions)
+radv_get_device_uuid(struct radeon_info *info, void *uuid)
 {
-       assert(extensions);
-
-       if (!extensions)
-               radv_loge("Attemted to free invalid extension struct\n");
-
-       if (extensions->ext_array)
-               vk_free(&instance->alloc, extensions->ext_array);
+       ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
 }
 
-static bool
-is_extension_enabled(const VkExtensionProperties *extensions,
-                       size_t num_ext,
-                       const char *name)
+static void
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
 {
-       assert(extensions && name);
+       const char *chip_string;
+       char llvm_string[32] = {};
 
-       for (uint32_t i = 0; i < num_ext; i++) {
-               if (strcmp(name, extensions[i].extensionName) == 0)
-                       return true;
-       }
-
-       return false;
+       switch (family) {
+       case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
+       case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
+       case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
+       case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
+       case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
+       case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
+       case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
+       case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
+       case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
+       case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
+       case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
+       case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
+       case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
+       case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
+       case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
+       case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
+       case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
+       case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
+       case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
+       case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
+       default: chip_string = "AMD RADV unknown"; break;
+       }
+
+       if (HAVE_LLVM > 0) {
+               snprintf(llvm_string, sizeof(llvm_string),
+                        " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
+                        HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+       }
+
+       snprintf(name, name_len, "%s%s", chip_string, llvm_string);
 }
 
-static const char *
-get_chip_name(enum radeon_family family)
+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
 {
-       switch (family) {
-       case CHIP_TAHITI: return "AMD RADV TAHITI";
-       case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
-       case CHIP_VERDE: return "AMD RADV CAPE VERDE";
-       case CHIP_OLAND: return "AMD RADV OLAND";
-       case CHIP_HAINAN: return "AMD RADV HAINAN";
-       case CHIP_BONAIRE: return "AMD RADV BONAIRE";
-       case CHIP_KAVERI: return "AMD RADV KAVERI";
-       case CHIP_KABINI: return "AMD RADV KABINI";
-       case CHIP_HAWAII: return "AMD RADV HAWAII";
-       case CHIP_MULLINS: return "AMD RADV MULLINS";
-       case CHIP_TONGA: return "AMD RADV TONGA";
-       case CHIP_ICELAND: return "AMD RADV ICELAND";
-       case CHIP_CARRIZO: return "AMD RADV CARRIZO";
-       case CHIP_FIJI: return "AMD RADV FIJI";
-       case CHIP_POLARIS10: return "AMD RADV POLARIS10";
-       case CHIP_POLARIS11: return "AMD RADV POLARIS11";
-       case CHIP_POLARIS12: return "AMD RADV POLARIS12";
-       case CHIP_STONEY: return "AMD RADV STONEY";
-       case CHIP_VEGA10: return "AMD RADV VEGA";
-       case CHIP_RAVEN: return "AMD RADV RAVEN";
-       default: return "AMD RADV unknown";
+       STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+       uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+                                         device->rad_info.vram_vis_size);
+
+       int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+       device->memory_properties.memoryHeapCount = 0;
+       if (device->rad_info.vram_size - visible_vram_size > 0) {
+               vram_index = device->memory_properties.memoryHeapCount++;
+               device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+                       .size = device->rad_info.vram_size - visible_vram_size,
+                       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+               };
        }
+       if (visible_vram_size) {
+               visible_vram_index = device->memory_properties.memoryHeapCount++;
+               device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+                       .size = visible_vram_size,
+                       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+               };
+       }
+       if (device->rad_info.gart_size > 0) {
+               gart_index = device->memory_properties.memoryHeapCount++;
+               device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+                       .size = device->rad_info.gart_size,
+                       .flags = 0,
+               };
+       }
+
+       STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+       unsigned type_count = 0;
+       if (vram_index >= 0) {
+               device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                       .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                       .heapIndex = vram_index,
+               };
+       }
+       if (gart_index >= 0) {
+               device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                       .heapIndex = gart_index,
+               };
+       }
+       if (visible_vram_index >= 0) {
+               device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                       .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                       .heapIndex = visible_vram_index,
+               };
+       }
+       if (gart_index >= 0) {
+               device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+               device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+                       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                       VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+                       .heapIndex = gart_index,
+               };
+       }
+       device->memory_properties.memoryTypeCount = type_count;
 }
 
 static VkResult
@@ -275,7 +195,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 
        fd = open(path, O_RDWR | O_CLOEXEC);
        if (fd < 0)
-               return VK_ERROR_INCOMPATIBLE_DRIVER;
+               return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
 
        version = drmGetVersion(fd);
        if (!version) {
@@ -305,40 +225,32 @@ radv_physical_device_init(struct radv_physical_device *device,
 
        device->local_fd = fd;
        device->ws->query_info(device->ws, &device->rad_info);
-       result = radv_init_wsi(device);
-       if (result != VK_SUCCESS) {
-               device->ws->destroy(device->ws);
-               goto fail;
-       }
+
+       radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
 
        if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
-               radv_finish_wsi(device);
                device->ws->destroy(device->ws);
                result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
                                   "cannot generate UUID");
                goto fail;
        }
 
-       result = radv_extensions_register(instance,
-                                       &device->extensions,
-                                       common_device_extensions,
-                                       ARRAY_SIZE(common_device_extensions));
-       if (result != VK_SUCCESS)
-               goto fail;
+       /* These flags affect shader compilation. */
+       uint64_t shader_env_flags =
+               (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
+               (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
 
-       if (device->rad_info.has_syncobj) {
-               result = radv_extensions_register(instance,
-                                                 &device->extensions,
-                                                 ext_sema_device_extensions,
-                                                 ARRAY_SIZE(ext_sema_device_extensions));
-               if (result != VK_SUCCESS)
-                       goto fail;
-       }
+       /* The gpu id is already embeded in the uuid so we just pass "radv"
+        * when creating the cache.
+        */
+       char buf[VK_UUID_SIZE * 2 + 1];
+       disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+       device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
 
        fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
-       device->name = get_chip_name(device->rad_info.family);
 
-       radv_get_device_uuid(drm_device, device->device_uuid);
+       radv_get_driver_uuid(&device->device_uuid);
+       radv_get_device_uuid(&device->rad_info, &device->device_uuid);
 
        if (device->rad_info.family == CHIP_STONEY ||
            device->rad_info.chip_class >= GFX9) {
@@ -346,6 +258,25 @@ radv_physical_device_init(struct radv_physical_device *device,
                device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
        }
 
+       /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
+        * on SI.
+        */
+       device->has_clear_state = device->rad_info.chip_class >= CIK;
+
+       device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
+
+       /* Vega10/Raven need a special workaround for a hardware bug. */
+       device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
+                                 device->rad_info.family == CHIP_RAVEN;
+
+       radv_physical_device_init_mem_types(device);
+
+       result = radv_init_wsi(device);
+       if (result != VK_SUCCESS) {
+               device->ws->destroy(device->ws);
+               goto fail;
+       }
+
        return VK_SUCCESS;
 
 fail:
@@ -356,9 +287,9 @@ fail:
 static void
 radv_physical_device_finish(struct radv_physical_device *device)
 {
-       radv_extensions_finish(device->instance, &device->extensions);
        radv_finish_wsi(device);
        device->ws->destroy(device->ws);
+       disk_cache_destroy(device->disk_cache);
        close(device->local_fd);
 }
 
@@ -400,15 +331,54 @@ static const struct debug_control radv_debug_options[] = {
        {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
        {"allbos", RADV_DEBUG_ALL_BOS},
        {"noibs", RADV_DEBUG_NO_IBS},
+       {"spirv", RADV_DEBUG_DUMP_SPIRV},
+       {"vmfaults", RADV_DEBUG_VM_FAULTS},
+       {"zerovram", RADV_DEBUG_ZERO_VRAM},
+       {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
+       {"nosisched", RADV_DEBUG_NO_SISCHED},
        {NULL, 0}
 };
 
+const char *
+radv_get_debug_option_name(int id)
+{
+       assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+       return radv_debug_options[id].string;
+}
+
 static const struct debug_control radv_perftest_options[] = {
-       {"batchchain", RADV_PERFTEST_BATCHCHAIN},
+       {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
        {"sisched", RADV_PERFTEST_SISCHED},
+       {"localbos", RADV_PERFTEST_LOCAL_BOS},
+       {"binning", RADV_PERFTEST_BINNING},
        {NULL, 0}
 };
 
+const char *
+radv_get_perftest_option_name(int id)
+{
+       assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+       return radv_perftest_options[id].string;
+}
+
+static void
+radv_handle_per_app_options(struct radv_instance *instance,
+                           const VkApplicationInfo *info)
+{
+       const char *name = info ? info->pApplicationName : NULL;
+
+       if (!name)
+               return;
+
+       if (!strcmp(name, "Talos - Linux - 32bit") ||
+           !strcmp(name, "Talos - Linux - 64bit")) {
+               /* Force enable LLVM sisched for Talos because it looks safe
+                * and it gives few more FPS.
+                */
+               instance->perftest_flags |= RADV_PERFTEST_SISCHED;
+       }
+}
+
 VkResult radv_CreateInstance(
        const VkInstanceCreateInfo*                 pCreateInfo,
        const VkAllocationCallbacks*                pAllocator,
@@ -436,19 +406,16 @@ VkResult radv_CreateInstance(
        }
 
        for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
-               if (!is_extension_enabled(instance_extensions,
-                                       ARRAY_SIZE(instance_extensions),
-                                       pCreateInfo->ppEnabledExtensionNames[i]))
+               const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
+               if (!radv_instance_extension_supported(ext_name))
                        return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
        }
 
-       instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
-                              VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+       instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
        if (!instance)
                return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       memset(instance, 0, sizeof(*instance));
-
        instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
 
        if (pAllocator)
@@ -469,6 +436,16 @@ VkResult radv_CreateInstance(
        instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
                                                   radv_perftest_options);
 
+       radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
+
+       if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
+               /* Disable sisched when the user requests it, this is mostly
+                * useful when the driver force-enable sisched for the given
+                * application.
+                */
+               instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
+       }
+
        *pInstance = radv_instance_to_handle(instance);
 
        return VK_SUCCESS;
@@ -506,12 +483,12 @@ radv_enumerate_devices(struct radv_instance *instance)
 
        max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
        if (max_devices < 1)
-               return VK_ERROR_INCOMPATIBLE_DRIVER;
+               return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
 
        for (unsigned i = 0; i < (unsigned)max_devices; i++) {
                if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
                    devices[i]->bustype == DRM_BUS_PCI &&
-                   devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
+                   devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
 
                        result = radv_physical_device_init(instance->physicalDevices +
                                                           instance->physicalDeviceCount,
@@ -559,8 +536,6 @@ void radv_GetPhysicalDeviceFeatures(
        VkPhysicalDevice                            physicalDevice,
        VkPhysicalDeviceFeatures*                   pFeatures)
 {
-       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-       bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
        memset(pFeatures, 0, sizeof(*pFeatures));
 
        *pFeatures = (VkPhysicalDeviceFeatures) {
@@ -568,8 +543,8 @@ void radv_GetPhysicalDeviceFeatures(
                .fullDrawIndexUint32                      = true,
                .imageCubeArray                           = true,
                .independentBlend                         = true,
-               .geometryShader                           = !is_gfx9,
-               .tessellationShader                       = !is_gfx9,
+               .geometryShader                           = true,
+               .tessellationShader                       = true,
                .sampleRateShading                        = true,
                .dualSrcBlend                             = true,
                .logicOp                                  = true,
@@ -624,6 +599,13 @@ void radv_GetPhysicalDeviceFeatures2KHR(
                        features->variablePointers = false;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
+                       VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
+                       features->multiview = true;
+                       features->multiviewGeometryShader = true;
+                       features->multiviewTessellationShader = true;
+                       break;
+               }
                default:
                        break;
                }
@@ -765,9 +747,9 @@ void radv_GetPhysicalDeviceProperties(
        };
 
        *pProperties = (VkPhysicalDeviceProperties) {
-               .apiVersion = VK_MAKE_VERSION(1, 0, 42),
+               .apiVersion = radv_physical_device_api_version(pdevice),
                .driverVersion = vk_get_driver_version(),
-               .vendorID = 0x1002,
+               .vendorID = ATI_VENDOR_ID,
                .deviceID = pdevice->rad_info.pci_id,
                .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
                .limits = limits,
@@ -795,11 +777,29 @@ void radv_GetPhysicalDeviceProperties2KHR(
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
                        VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
-                       radv_device_get_cache_uuid(0, properties->driverUUID);
+                       memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
                        memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
                        properties->deviceLUIDValid = false;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
+                       VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
+                       properties->maxMultiviewViewCount = MAX_VIEWS;
+                       properties->maxMultiviewInstanceIndex = INT_MAX;
+                       break;
+               }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
+                       VkPhysicalDevicePointClippingPropertiesKHR *properties =
+                           (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
+                       properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
+                       break;
+               }
+               case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
+                       VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
+                           (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
+                       properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
+                       break;
+               }
                default:
                        break;
                }
@@ -902,47 +902,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 {
        RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 
-       STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
-       pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
-       pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
-               .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-               .heapIndex = RADV_MEM_HEAP_VRAM,
-       };
-       pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
-               .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-               VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-               .heapIndex = RADV_MEM_HEAP_GTT,
-       };
-       pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
-               .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-               VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-               .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-       };
-       pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
-               .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-               VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-               VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-               .heapIndex = RADV_MEM_HEAP_GTT,
-       };
-
-       STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-
-       pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
-       pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
-               .size = physical_device->rad_info.vram_size -
-                               physical_device->rad_info.vram_vis_size,
-               .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-       };
-       pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-               .size = physical_device->rad_info.vram_vis_size,
-               .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-       };
-       pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
-               .size = physical_device->rad_info.gart_size,
-               .flags = 0,
-       };
+       *pMemoryProperties = physical_device->memory_properties;
 }
 
 void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -953,18 +913,42 @@ void radv_GetPhysicalDeviceMemoryProperties2KHR(
                                                      &pMemoryProperties->memoryProperties);
 }
 
+static enum radeon_ctx_priority
+radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
+{
+       /* Default to MEDIUM when a specific global priority isn't requested */
+       if (!pObj)
+               return RADEON_CTX_PRIORITY_MEDIUM;
+
+       switch(pObj->globalPriority) {
+       case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
+               return RADEON_CTX_PRIORITY_REALTIME;
+       case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
+               return RADEON_CTX_PRIORITY_HIGH;
+       case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
+               return RADEON_CTX_PRIORITY_MEDIUM;
+       case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
+               return RADEON_CTX_PRIORITY_LOW;
+       default:
+               unreachable("Illegal global priority value");
+               return RADEON_CTX_PRIORITY_INVALID;
+       }
+}
+
 static int
 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
-               int queue_family_index, int idx)
+               uint32_t queue_family_index, int idx,
+               const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
 {
        queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
        queue->device = device;
        queue->queue_family_index = queue_family_index;
        queue->queue_idx = idx;
+       queue->priority = radv_get_queue_global_priority(global_priority);
 
-       queue->hw_ctx = device->ws->ctx_create(device->ws);
+       queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
        if (!queue->hw_ctx)
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
        return VK_SUCCESS;
 }
@@ -975,6 +959,8 @@ radv_queue_finish(struct radv_queue *queue)
        if (queue->hw_ctx)
                queue->device->ws->ctx_destroy(queue->hw_ctx);
 
+       if (queue->initial_full_flush_preamble_cs)
+               queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
        if (queue->initial_preamble_cs)
                queue->device->ws->cs_destroy(queue->initial_preamble_cs);
        if (queue->continue_preamble_cs)
@@ -1038,11 +1024,15 @@ VkResult radv_CreateDevice(
        VkResult result;
        struct radv_device *device;
 
+       bool keep_shader_info = false;
+
        for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
-               if (!is_extension_enabled(physical_device->extensions.ext_array,
-                                       physical_device->extensions.num_ext,
-                                       pCreateInfo->ppEnabledExtensionNames[i]))
+               const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
+               if (!radv_physical_device_extension_supported(physical_device, ext_name))
                        return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
+
+               if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
+                       keep_shader_info = true;
        }
 
        /* Check enabled features */
@@ -1058,29 +1048,32 @@ VkResult radv_CreateDevice(
                }
        }
 
-       device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
-                            sizeof(*device), 8,
-                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+       device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
+                           sizeof(*device), 8,
+                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
        if (!device)
                return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       memset(device, 0, sizeof(*device));
-
        device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
        device->instance = physical_device->instance;
        device->physical_device = physical_device;
 
-       device->debug_flags = device->instance->debug_flags;
-
        device->ws = physical_device->ws;
        if (pAllocator)
                device->alloc = *pAllocator;
        else
                device->alloc = physical_device->instance->alloc;
 
+       mtx_init(&device->shader_slab_mutex, mtx_plain);
+       list_inithead(&device->shader_slabs);
+
        for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
                const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
                uint32_t qfi = queue_create->queueFamilyIndex;
+               const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+                       vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+
+               assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
 
                device->queues[qfi] = vk_alloc(&device->alloc,
                                               queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -1094,12 +1087,19 @@ VkResult radv_CreateDevice(
                device->queue_count[qfi] = queue_create->queueCount;
 
                for (unsigned q = 0; q < queue_create->queueCount; q++) {
-                       result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
+                       result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
                        if (result != VK_SUCCESS)
                                goto fail;
                }
        }
 
+       device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
+                             (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
+
+       /* Disabled and not implemented for now. */
+       device->dfsm_allowed = device->pbb_allowed && false;
+
+
 #if HAVE_LLVM < 0x0400
        device->llvm_supports_spill = false;
 #else
@@ -1122,6 +1122,16 @@ VkResult radv_CreateDevice(
        device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
                                     max_threads_per_block / 64);
 
+       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
+                                    S_00B800_FORCE_START_AT_000(1);
+
+       if (device->physical_device->rad_info.chip_class >= CIK) {
+               /* If the KMD allows it (there is a KMD hw register for it),
+                * allow launching waves out-of-order.
+                */
+               device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
+       }
+
        radv_device_init_gs_info(device);
 
        device->tess_offchip_block_dw_size =
@@ -1130,6 +1140,15 @@ VkResult radv_CreateDevice(
                device->physical_device->rad_info.chip_class >= VI &&
                device->physical_device->rad_info.max_se >= 2;
 
+       if (getenv("RADV_TRACE_FILE")) {
+               keep_shader_info = true;
+
+               if (!radv_init_trace(device))
+                       goto fail;
+       }
+
+       device->keep_shader_info = keep_shader_info;
+
        result = radv_device_init_meta(device);
        if (result != VK_SUCCESS)
                goto fail;
@@ -1150,52 +1169,6 @@ VkResult radv_CreateDevice(
                        break;
                }
                device->ws->cs_finalize(device->empty_cs[family]);
-
-               device->flush_cs[family] = device->ws->cs_create(device->ws, family);
-               switch (family) {
-               case RADV_QUEUE_GENERAL:
-               case RADV_QUEUE_COMPUTE:
-                       si_cs_emit_cache_flush(device->flush_cs[family],
-                                              false,
-                                              device->physical_device->rad_info.chip_class,
-                                              NULL, 0,
-                                              family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
-                                              RADV_CMD_FLAG_INV_ICACHE |
-                                              RADV_CMD_FLAG_INV_SMEM_L1 |
-                                              RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
-                       break;
-               }
-               device->ws->cs_finalize(device->flush_cs[family]);
-
-               device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
-               switch (family) {
-               case RADV_QUEUE_GENERAL:
-               case RADV_QUEUE_COMPUTE:
-                       si_cs_emit_cache_flush(device->flush_shader_cs[family],
-                                              false,
-                                              device->physical_device->rad_info.chip_class,
-                                              NULL, 0,
-                                              family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
-                                              family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
-                                              RADV_CMD_FLAG_INV_ICACHE |
-                                              RADV_CMD_FLAG_INV_SMEM_L1 |
-                                              RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
-                       break;
-               }
-               device->ws->cs_finalize(device->flush_shader_cs[family]);
-       }
-
-       if (getenv("RADV_TRACE_FILE")) {
-               device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
-                                                            RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
-               if (!device->trace_bo)
-                       goto fail;
-
-               device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
-               if (!device->trace_id_ptr)
-                       goto fail;
        }
 
        if (device->physical_device->rad_info.chip_class >= CIK)
@@ -1258,58 +1231,15 @@ void radv_DestroyDevice(
                        vk_free(&device->alloc, device->queues[i]);
                if (device->empty_cs[i])
                        device->ws->cs_destroy(device->empty_cs[i]);
-               if (device->flush_cs[i])
-                       device->ws->cs_destroy(device->flush_cs[i]);
-               if (device->flush_shader_cs[i])
-                       device->ws->cs_destroy(device->flush_shader_cs[i]);
        }
        radv_device_finish_meta(device);
 
        VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
        radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
 
-       vk_free(&device->alloc, device);
-}
-
-VkResult radv_EnumerateInstanceExtensionProperties(
-       const char*                                 pLayerName,
-       uint32_t*                                   pPropertyCount,
-       VkExtensionProperties*                      pProperties)
-{
-       if (pProperties == NULL) {
-               *pPropertyCount = ARRAY_SIZE(instance_extensions);
-               return VK_SUCCESS;
-       }
-
-       *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
-       typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
-
-       if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
-               return VK_INCOMPLETE;
-
-       return VK_SUCCESS;
-}
-
-VkResult radv_EnumerateDeviceExtensionProperties(
-       VkPhysicalDevice                            physicalDevice,
-       const char*                                 pLayerName,
-       uint32_t*                                   pPropertyCount,
-       VkExtensionProperties*                      pProperties)
-{
-       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-
-       if (pProperties == NULL) {
-               *pPropertyCount = pdevice->extensions.num_ext;
-               return VK_SUCCESS;
-       }
-
-       *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
-       typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
-
-       if (*pPropertyCount < pdevice->extensions.num_ext)
-               return VK_INCOMPLETE;
+       radv_destroy_shader_slabs(device);
 
-       return VK_SUCCESS;
+       vk_free(&device->alloc, device);
 }
 
 VkResult radv_EnumerateInstanceLayerProperties(
@@ -1350,21 +1280,6 @@ void radv_GetDeviceQueue(
        *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
 }
 
-static void radv_dump_trace(struct radv_device *device,
-                           struct radeon_winsys_cs *cs)
-{
-       const char *filename = getenv("RADV_TRACE_FILE");
-       FILE *f = fopen(filename, "w");
-       if (!f) {
-               fprintf(stderr, "Failed to write trace dump to %s\n", filename);
-               return;
-       }
-
-       fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
-       device->ws->cs_dump(cs, f, *device->trace_id_ptr);
-       fclose(f);
-}
-
 static void
 fill_geom_tess_rings(struct radv_queue *queue,
                     uint32_t *map,
@@ -1383,13 +1298,13 @@ fill_geom_tess_rings(struct radv_queue *queue,
        uint32_t *desc = &map[4];
 
        if (esgs_ring_bo)
-               esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
+               esgs_va = radv_buffer_get_va(esgs_ring_bo);
        if (gsvs_ring_bo)
-               gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
+               gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
        if (tess_factor_ring_bo)
-               tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
+               tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
        if (tess_offchip_ring_bo)
-               tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
+               tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
 
        /* stride 0, num records - size, add tid, swizzle, elsize4,
           index stride 64 */
@@ -1567,6 +1482,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                     uint32_t gsvs_ring_size,
                     bool needs_tess_rings,
                     bool needs_sample_positions,
+                    struct radeon_winsys_cs **initial_full_flush_preamble_cs,
                      struct radeon_winsys_cs **initial_preamble_cs,
                      struct radeon_winsys_cs **continue_preamble_cs)
 {
@@ -1577,11 +1493,12 @@ radv_get_preamble_cs(struct radv_queue *queue,
        struct radeon_winsys_bo *gsvs_ring_bo = NULL;
        struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
        struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
-       struct radeon_winsys_cs *dest_cs[2] = {0};
+       struct radeon_winsys_cs *dest_cs[3] = {0};
        bool add_tess_rings = false, add_sample_positions = false;
        unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
        unsigned max_offchip_buffers;
        unsigned hs_offchip_param = 0;
+       uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
        if (!queue->has_tess_rings) {
                if (needs_tess_rings)
                        add_tess_rings = true;
@@ -1602,6 +1519,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
            gsvs_ring_size <= queue->gsvs_ring_size &&
            !add_tess_rings && !add_sample_positions &&
            queue->initial_preamble_cs) {
+               *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
                *initial_preamble_cs = queue->initial_preamble_cs;
                *continue_preamble_cs = queue->continue_preamble_cs;
                if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1614,7 +1532,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                              scratch_size,
                                                              4096,
                                                              RADEON_DOMAIN_VRAM,
-                                                             RADEON_FLAG_NO_CPU_ACCESS);
+                                                             ring_bo_flags);
                if (!scratch_bo)
                        goto fail;
        } else
@@ -1625,7 +1543,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                      compute_scratch_size,
                                                                      4096,
                                                                      RADEON_DOMAIN_VRAM,
-                                                                     RADEON_FLAG_NO_CPU_ACCESS);
+                                                                     ring_bo_flags);
                if (!compute_scratch_bo)
                        goto fail;
 
@@ -1637,7 +1555,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                esgs_ring_size,
                                                                4096,
                                                                RADEON_DOMAIN_VRAM,
-                                                               RADEON_FLAG_NO_CPU_ACCESS);
+                                                               ring_bo_flags);
                if (!esgs_ring_bo)
                        goto fail;
        } else {
@@ -1650,7 +1568,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                gsvs_ring_size,
                                                                4096,
                                                                RADEON_DOMAIN_VRAM,
-                                                               RADEON_FLAG_NO_CPU_ACCESS);
+                                                               ring_bo_flags);
                if (!gsvs_ring_bo)
                        goto fail;
        } else {
@@ -1663,14 +1581,14 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                       tess_factor_ring_size,
                                                                       256,
                                                                       RADEON_DOMAIN_VRAM,
-                                                                      RADEON_FLAG_NO_CPU_ACCESS);
+                                                                      ring_bo_flags);
                if (!tess_factor_ring_bo)
                        goto fail;
                tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
                                                                       tess_offchip_ring_size,
                                                                       256,
                                                                       RADEON_DOMAIN_VRAM,
-                                                                      RADEON_FLAG_NO_CPU_ACCESS);
+                                                                       ring_bo_flags);
                if (!tess_offchip_ring_bo)
                        goto fail;
        } else {
@@ -1697,13 +1615,15 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                                 size,
                                                                 4096,
                                                                 RADEON_DOMAIN_VRAM,
-                                                                RADEON_FLAG_CPU_ACCESS);
+                                                                RADEON_FLAG_CPU_ACCESS |
+                                                                RADEON_FLAG_NO_INTERPROCESS_SHARING |
+                                                                RADEON_FLAG_READ_ONLY);
                if (!descriptor_bo)
                        goto fail;
        } else
                descriptor_bo = queue->descriptor_bo;
 
-       for(int i = 0; i < 2; ++i) {
+       for(int i = 0; i < 3; ++i) {
                struct radeon_winsys_cs *cs = NULL;
                cs = queue->device->ws->cs_create(queue->device->ws,
                                                  queue->queue_family_index ? RING_COMPUTE : RING_GFX);
@@ -1713,28 +1633,28 @@ radv_get_preamble_cs(struct radv_queue *queue,
                dest_cs[i] = cs;
 
                if (scratch_bo)
-                       queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
 
                if (esgs_ring_bo)
-                       queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
 
                if (gsvs_ring_bo)
-                       queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
 
                if (tess_factor_ring_bo)
-                       queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
 
                if (tess_offchip_ring_bo)
-                       queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
 
                if (descriptor_bo)
-                       queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
 
                if (descriptor_bo != queue->descriptor_bo) {
                        uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
 
                        if (scratch_bo) {
-                               uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
+                               uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
                                uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
                                                 S_008F04_SWIZZLE_ENABLE(1);
                                map[0] = scratch_va;
@@ -1772,7 +1692,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                }
 
                if (tess_factor_ring_bo) {
-                       uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
+                       uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
                        if (queue->device->physical_device->rad_info.chip_class >= CIK) {
                                radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
                                                       S_030938_SIZE(tess_factor_ring_size / 4));
@@ -1794,35 +1714,59 @@ radv_get_preamble_cs(struct radv_queue *queue,
                }
 
                if (descriptor_bo) {
-                       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
-                                          R_00B130_SPI_SHADER_USER_DATA_VS_0,
-                                          R_00B230_SPI_SHADER_USER_DATA_GS_0,
-                                          R_00B330_SPI_SHADER_USER_DATA_ES_0,
-                                          R_00B430_SPI_SHADER_USER_DATA_HS_0,
-                                          R_00B530_SPI_SHADER_USER_DATA_LS_0};
-
-                       uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
-
-                       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
-                               radeon_set_sh_reg_seq(cs, regs[i], 2);
-                               radeon_emit(cs, va);
-                               radeon_emit(cs, va >> 32);
+                       uint64_t va = radv_buffer_get_va(descriptor_bo);
+                       if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+                               uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
+                                               R_00B130_SPI_SHADER_USER_DATA_VS_0,
+                                               R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                                               R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+                               for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+                                       radeon_set_sh_reg_seq(cs, regs[i], 2);
+                                       radeon_emit(cs, va);
+                                       radeon_emit(cs, va >> 32);
+                               }
+                       } else {
+                               uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
+                                               R_00B130_SPI_SHADER_USER_DATA_VS_0,
+                                               R_00B230_SPI_SHADER_USER_DATA_GS_0,
+                                               R_00B330_SPI_SHADER_USER_DATA_ES_0,
+                                               R_00B430_SPI_SHADER_USER_DATA_HS_0,
+                                               R_00B530_SPI_SHADER_USER_DATA_LS_0};
+
+                               for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+                                       radeon_set_sh_reg_seq(cs, regs[i], 2);
+                                       radeon_emit(cs, va);
+                                       radeon_emit(cs, va >> 32);
+                               }
                        }
                }
 
                if (compute_scratch_bo) {
-                       uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
+                       uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
                        uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
                                         S_008F04_SWIZZLE_ENABLE(1);
 
-                       queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
+                       radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
 
                        radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
                        radeon_emit(cs, scratch_va);
                        radeon_emit(cs, rsrc1);
                }
 
-               if (!i) {
+               if (i == 0) {
+                       si_cs_emit_cache_flush(cs,
+                                              false,
+                                              queue->device->physical_device->rad_info.chip_class,
+                                              NULL, 0,
+                                              queue->queue_family_index == RING_COMPUTE &&
+                                                queue->device->physical_device->rad_info.chip_class >= CIK,
+                                              (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+                                              RADV_CMD_FLAG_INV_ICACHE |
+                                              RADV_CMD_FLAG_INV_SMEM_L1 |
+                                              RADV_CMD_FLAG_INV_VMEM_L1 |
+                                              RADV_CMD_FLAG_INV_GLOBAL_L2);
+               } else if (i == 1) {
                        si_cs_emit_cache_flush(cs,
                                               false,
                                               queue->device->physical_device->rad_info.chip_class,
@@ -1839,14 +1783,18 @@ radv_get_preamble_cs(struct radv_queue *queue,
                        goto fail;
        }
 
+       if (queue->initial_full_flush_preamble_cs)
+                       queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+
        if (queue->initial_preamble_cs)
                        queue->device->ws->cs_destroy(queue->initial_preamble_cs);
 
        if (queue->continue_preamble_cs)
                        queue->device->ws->cs_destroy(queue->continue_preamble_cs);
 
-       queue->initial_preamble_cs = dest_cs[0];
-       queue->continue_preamble_cs = dest_cs[1];
+       queue->initial_full_flush_preamble_cs = dest_cs[0];
+       queue->initial_preamble_cs = dest_cs[1];
+       queue->continue_preamble_cs = dest_cs[2];
 
        if (scratch_bo != queue->scratch_bo) {
                if (queue->scratch_bo)
@@ -1895,6 +1843,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
        if (add_sample_positions)
                queue->has_sample_positions = true;
 
+       *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
        *initial_preamble_cs = queue->initial_preamble_cs;
        *continue_preamble_cs = queue->continue_preamble_cs;
        if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
@@ -1918,18 +1867,20 @@ fail:
                queue->device->ws->buffer_destroy(tess_factor_ring_bo);
        if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
                queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
-       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+       return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
 }
 
 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
                                      int num_sems,
                                      const VkSemaphore *sems,
+                                     VkFence _fence,
                                      bool reset_temp)
 {
        int syncobj_idx = 0, sem_idx = 0;
 
-       if (num_sems == 0)
+       if (num_sems == 0 && _fence == VK_NULL_HANDLE)
                return VK_SUCCESS;
+
        for (uint32_t i = 0; i < num_sems; i++) {
                RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
 
@@ -1939,17 +1890,23 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
                        counts->sem_count++;
        }
 
+       if (_fence != VK_NULL_HANDLE) {
+               RADV_FROM_HANDLE(radv_fence, fence, _fence);
+               if (fence->temp_syncobj || fence->syncobj)
+                       counts->syncobj_count++;
+       }
+
        if (counts->syncobj_count) {
                counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
                if (!counts->syncobj)
-                       return VK_ERROR_OUT_OF_HOST_MEMORY;
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
        }
 
        if (counts->sem_count) {
                counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
                if (!counts->sem) {
                        free(counts->syncobj);
-                       return VK_ERROR_OUT_OF_HOST_MEMORY;
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
                }
        }
 
@@ -1958,10 +1915,6 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
 
                if (sem->temp_syncobj) {
                        counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
-                       if (reset_temp) {
-                               /* after we wait on a temp import - drop it */
-                               sem->temp_syncobj = 0;
-                       }
                }
                else if (sem->syncobj)
                        counts->syncobj[syncobj_idx++] = sem->syncobj;
@@ -1971,6 +1924,14 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
                }
        }
 
+       if (_fence != VK_NULL_HANDLE) {
+               RADV_FROM_HANDLE(radv_fence, fence, _fence);
+               if (fence->temp_syncobj)
+                       counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
+               else if (fence->syncobj)
+                       counts->syncobj[syncobj_idx++] = fence->syncobj;
+       }
+
        return VK_SUCCESS;
 }
 
@@ -1982,19 +1943,35 @@ void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
        free(sem_info->signal.sem);
 }
 
+
+static void radv_free_temp_syncobjs(struct radv_device *device,
+                                   int num_sems,
+                                   const VkSemaphore *sems)
+{
+       for (uint32_t i = 0; i < num_sems; i++) {
+               RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
+
+               if (sem->temp_syncobj) {
+                       device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
+                       sem->temp_syncobj = 0;
+               }
+       }
+}
+
 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
                             int num_wait_sems,
                             const VkSemaphore *wait_sems,
                             int num_signal_sems,
-                            const VkSemaphore *signal_sems)
+                            const VkSemaphore *signal_sems,
+                            VkFence fence)
 {
        VkResult ret;
        memset(sem_info, 0, sizeof(*sem_info));
 
-       ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
+       ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
        if (ret)
                return ret;
-       ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
+       ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
        if (ret)
                radv_free_sem_info(sem_info);
 
@@ -2019,7 +1996,7 @@ VkResult radv_QueueSubmit(
        uint32_t scratch_size = 0;
        uint32_t compute_scratch_size = 0;
        uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
-       struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
+       struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
        VkResult result;
        bool fence_emitted = false;
        bool tess_rings_needed = false;
@@ -2044,7 +2021,7 @@ VkResult radv_QueueSubmit(
 
        result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
                                      esgs_ring_size, gsvs_ring_size, tess_rings_needed,
-                                     sample_positions_needed,
+                                     sample_positions_needed, &initial_flush_preamble_cs,
                                      &initial_preamble_cs, &continue_preamble_cs);
        if (result != VK_SUCCESS)
                return result;
@@ -2052,7 +2029,7 @@ VkResult radv_QueueSubmit(
        for (uint32_t i = 0; i < submitCount; i++) {
                struct radeon_winsys_cs **cs_array;
                bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
-               bool can_patch = !do_flush;
+               bool can_patch = true;
                uint32_t advance;
                struct radv_winsys_sem_info sem_info;
 
@@ -2060,7 +2037,8 @@ VkResult radv_QueueSubmit(
                                             pSubmits[i].waitSemaphoreCount,
                                             pSubmits[i].pWaitSemaphores,
                                             pSubmits[i].signalSemaphoreCount,
-                                            pSubmits[i].pSignalSemaphores);
+                                            pSubmits[i].pSignalSemaphores,
+                                            _fence);
                if (result != VK_SUCCESS)
                        return result;
 
@@ -2082,35 +2060,33 @@ VkResult radv_QueueSubmit(
                }
 
                cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
-                                               (pSubmits[i].commandBufferCount + do_flush));
-
-               if(do_flush)
-                       cs_array[0] = pSubmits[i].waitSemaphoreCount ?
-                               queue->device->flush_shader_cs[queue->queue_family_index] :
-                               queue->device->flush_cs[queue->queue_family_index];
+                                               (pSubmits[i].commandBufferCount));
 
                for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
                        RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
                                         pSubmits[i].pCommandBuffers[j]);
                        assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
-                       cs_array[j + do_flush] = cmd_buffer->cs;
+                       cs_array[j] = cmd_buffer->cs;
                        if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
                                can_patch = false;
+
+                       cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
                }
 
-               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
+               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
+                       struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
                        advance = MIN2(max_cs_submission,
-                                      pSubmits[i].commandBufferCount + do_flush - j);
+                                      pSubmits[i].commandBufferCount - j);
 
                        if (queue->device->trace_bo)
                                *queue->device->trace_id_ptr = 0;
 
                        sem_info.cs_emit_wait = j == 0;
-                       sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount + do_flush;
+                       sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
 
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
-                                                       advance, initial_preamble_cs, continue_preamble_cs,
+                                                       advance, initial_preamble, continue_preamble_cs,
                                                           &sem_info,
                                                        can_patch, base_fence);
 
@@ -2120,30 +2096,31 @@ VkResult radv_QueueSubmit(
                        }
                        fence_emitted = true;
                        if (queue->device->trace_bo) {
-                               bool success = queue->device->ws->ctx_wait_idle(
-                                                       queue->hw_ctx,
-                                                       radv_queue_family_to_ring(
-                                                               queue->queue_family_index),
-                                                       queue->queue_idx);
-
-                               if (!success) { /* Hang */
-                                       radv_dump_trace(queue->device, cs_array[j]);
-                                       abort();
-                               }
+                               radv_check_gpu_hangs(queue, cs_array[j]);
                        }
                }
 
+               radv_free_temp_syncobjs(queue->device,
+                                       pSubmits[i].waitSemaphoreCount,
+                                       pSubmits[i].pWaitSemaphores);
                radv_free_sem_info(&sem_info);
                free(cs_array);
        }
 
        if (fence) {
                if (!fence_emitted) {
-                       struct radv_winsys_sem_info sem_info = {0};
+                       struct radv_winsys_sem_info sem_info;
+
+                       result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
+                                                    _fence);
+                       if (result != VK_SUCCESS)
+                               return result;
+
                        ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
                                                           &queue->device->empty_cs[queue->queue_family_index],
                                                           1, NULL, NULL, &sem_info,
                                                           false, base_fence);
+                       radv_free_sem_info(&sem_info);
                }
                fence->submitted = true;
        }
@@ -2220,17 +2197,16 @@ bool radv_get_memory_fd(struct radv_device *device,
                                         pFD);
 }
 
-VkResult radv_AllocateMemory(
-       VkDevice                                    _device,
-       const VkMemoryAllocateInfo*                 pAllocateInfo,
-       const VkAllocationCallbacks*                pAllocator,
-       VkDeviceMemory*                             pMem)
+static VkResult radv_alloc_memory(struct radv_device *device,
+                                 const VkMemoryAllocateInfo*     pAllocateInfo,
+                                 const VkAllocationCallbacks*    pAllocator,
+                                 VkDeviceMemory*                 pMem)
 {
-       RADV_FROM_HANDLE(radv_device, device, _device);
        struct radv_device_memory *mem;
        VkResult result;
        enum radeon_bo_domain domain;
        uint32_t flags = 0;
+       enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
 
        assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
 
@@ -2244,12 +2220,20 @@ VkResult radv_AllocateMemory(
                vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
        const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
                vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
+       const VkExportMemoryAllocateInfoKHR *export_info =
+               vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+
+       const struct wsi_memory_allocate_info *wsi_info =
+               vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
 
        mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (mem == NULL)
                return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
+       if (wsi_info && wsi_info->implicit_sync)
+               flags |= RADEON_FLAG_IMPLICIT_SYNC;
+
        if (dedicate_info) {
                mem->image = radv_image_from_handle(dedicate_info->image);
                mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
@@ -2260,7 +2244,9 @@ VkResult radv_AllocateMemory(
 
        if (import_info) {
                assert(import_info->handleType ==
-                      VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
+                      VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
+                      import_info->handleType ==
+                      VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
                mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
                                                     NULL, NULL);
                if (!mem->bo) {
@@ -2273,20 +2259,23 @@ VkResult radv_AllocateMemory(
        }
 
        uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-       if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-           pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+       if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+           mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
                domain = RADEON_DOMAIN_GTT;
        else
                domain = RADEON_DOMAIN_VRAM;
 
-       if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+       if (mem_type_index == RADV_MEM_TYPE_VRAM)
                flags |= RADEON_FLAG_NO_CPU_ACCESS;
        else
                flags |= RADEON_FLAG_CPU_ACCESS;
 
-       if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+       if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
                flags |= RADEON_FLAG_GTT_WC;
 
+       if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
+               flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+
        mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
                                               domain, flags);
 
@@ -2294,7 +2283,7 @@ VkResult radv_AllocateMemory(
                result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
                goto fail;
        }
-       mem->type_index = pAllocateInfo->memoryTypeIndex;
+       mem->type_index = mem_type_index;
 out_success:
        *pMem = radv_device_memory_to_handle(mem);
 
@@ -2306,6 +2295,16 @@ fail:
        return result;
 }
 
+VkResult radv_AllocateMemory(
+       VkDevice                                    _device,
+       const VkMemoryAllocateInfo*                 pAllocateInfo,
+       const VkAllocationCallbacks*                pAllocator,
+       VkDeviceMemory*                             pMem)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
+}
+
 void radv_FreeMemory(
        VkDevice                                    _device,
        VkDeviceMemory                              _mem,
@@ -2345,7 +2344,7 @@ VkResult radv_MapMemory(
                return VK_SUCCESS;
        }
 
-       return VK_ERROR_MEMORY_MAP_FAILED;
+       return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
 }
 
 void radv_UnmapMemory(
@@ -2378,13 +2377,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
 }
 
 void radv_GetBufferMemoryRequirements(
-       VkDevice                                    device,
+       VkDevice                                    _device,
        VkBuffer                                    _buffer,
        VkMemoryRequirements*                       pMemoryRequirements)
 {
+       RADV_FROM_HANDLE(radv_device, device, _device);
        RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
 
-       pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+       pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
 
        if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
                pMemoryRequirements->alignment = 4096;
@@ -2401,13 +2401,13 @@ void radv_GetBufferMemoryRequirements2KHR(
 {
        radv_GetBufferMemoryRequirements(device, pInfo->buffer,
                                         &pMemoryRequirements->memoryRequirements);
-
+       RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
        vk_foreach_struct(ext, pMemoryRequirements->pNext) {
                switch (ext->sType) {
                case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
                        VkMemoryDedicatedRequirementsKHR *req =
                                       (VkMemoryDedicatedRequirementsKHR *) ext;
-                       req->requiresDedicatedAllocation = false;
+                       req->requiresDedicatedAllocation = buffer->shareable;
                        req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
                        break;
                }
@@ -2418,13 +2418,14 @@ void radv_GetBufferMemoryRequirements2KHR(
 }
 
 void radv_GetImageMemoryRequirements(
-       VkDevice                                    device,
+       VkDevice                                    _device,
        VkImage                                     _image,
        VkMemoryRequirements*                       pMemoryRequirements)
 {
+       RADV_FROM_HANDLE(radv_device, device, _device);
        RADV_FROM_HANDLE(radv_image, image, _image);
 
-       pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+       pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
 
        pMemoryRequirements->size = image->size;
        pMemoryRequirements->alignment = image->alignment;
@@ -2481,44 +2482,74 @@ void radv_GetDeviceMemoryCommitment(
        *pCommittedMemoryInBytes = 0;
 }
 
+VkResult radv_BindBufferMemory2KHR(VkDevice device,
+                                   uint32_t bindInfoCount,
+                                   const VkBindBufferMemoryInfoKHR *pBindInfos)
+{
+       for (uint32_t i = 0; i < bindInfoCount; ++i) {
+               RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+               RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
+
+               if (mem) {
+                       buffer->bo = mem->bo;
+                       buffer->offset = pBindInfos[i].memoryOffset;
+               } else {
+                       buffer->bo = NULL;
+               }
+       }
+       return VK_SUCCESS;
+}
+
 VkResult radv_BindBufferMemory(
        VkDevice                                    device,
-       VkBuffer                                    _buffer,
-       VkDeviceMemory                              _memory,
+       VkBuffer                                    buffer,
+       VkDeviceMemory                              memory,
        VkDeviceSize                                memoryOffset)
 {
-       RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
-       RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+       const VkBindBufferMemoryInfoKHR info = {
+               .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+               .buffer = buffer,
+               .memory = memory,
+               .memoryOffset = memoryOffset
+       };
 
-       if (mem) {
-               buffer->bo = mem->bo;
-               buffer->offset = memoryOffset;
-       } else {
-               buffer->bo = NULL;
-               buffer->offset = 0;
-       }
+       return radv_BindBufferMemory2KHR(device, 1, &info);
+}
+
+VkResult radv_BindImageMemory2KHR(VkDevice device,
+                                  uint32_t bindInfoCount,
+                                  const VkBindImageMemoryInfoKHR *pBindInfos)
+{
+       for (uint32_t i = 0; i < bindInfoCount; ++i) {
+               RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+               RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
 
+               if (mem) {
+                       image->bo = mem->bo;
+                       image->offset = pBindInfos[i].memoryOffset;
+               } else {
+                       image->bo = NULL;
+                       image->offset = 0;
+               }
+       }
        return VK_SUCCESS;
 }
 
+
 VkResult radv_BindImageMemory(
        VkDevice                                    device,
-       VkImage                                     _image,
-       VkDeviceMemory                              _memory,
+       VkImage                                     image,
+       VkDeviceMemory                              memory,
        VkDeviceSize                                memoryOffset)
 {
-       RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
-       RADV_FROM_HANDLE(radv_image, image, _image);
-
-       if (mem) {
-               image->bo = mem->bo;
-               image->offset = memoryOffset;
-       } else {
-               image->bo = NULL;
-               image->offset = 0;
-       }
+       const VkBindImageMemoryInfoKHR info = {
+               .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
+               .image = image,
+               .memory = memory,
+               .memoryOffset = memoryOffset
+       };
 
-       return VK_SUCCESS;
+       return radv_BindImageMemory2KHR(device, 1, &info);
 }
 
 
@@ -2590,7 +2621,8 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
                                             pBindInfo[i].waitSemaphoreCount,
                                             pBindInfo[i].pWaitSemaphores,
                                             pBindInfo[i].signalSemaphoreCount,
-                                            pBindInfo[i].pSignalSemaphores);
+                                            pBindInfo[i].pSignalSemaphores,
+                                            _fence);
                if (result != VK_SUCCESS)
                        return result;
 
@@ -2623,20 +2655,38 @@ VkResult radv_CreateFence(
        VkFence*                                    pFence)
 {
        RADV_FROM_HANDLE(radv_device, device, _device);
+       const VkExportFenceCreateInfoKHR *export =
+               vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
+       VkExternalFenceHandleTypeFlagsKHR handleTypes =
+               export ? export->handleTypes : 0;
+
        struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
                                               sizeof(*fence), 8,
                                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
        if (!fence)
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       memset(fence, 0, sizeof(*fence));
        fence->submitted = false;
        fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
-       fence->fence = device->ws->create_fence();
-       if (!fence->fence) {
-               vk_free2(&device->alloc, pAllocator, fence);
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
+       fence->temp_syncobj = 0;
+       if (handleTypes) {
+               int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
+               if (ret) {
+                       vk_free2(&device->alloc, pAllocator, fence);
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+               }
+               if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+                       device->ws->signal_syncobj(device->ws, fence->syncobj);
+               }
+               fence->fence = NULL;
+       } else {
+               fence->fence = device->ws->create_fence();
+               if (!fence->fence) {
+                       vk_free2(&device->alloc, pAllocator, fence);
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+               }
+               fence->syncobj = 0;
        }
 
        *pFence = radv_fence_to_handle(fence);
@@ -2654,7 +2704,13 @@ void radv_DestroyFence(
 
        if (!fence)
                return;
-       device->ws->destroy_fence(fence->fence);
+
+       if (fence->temp_syncobj)
+               device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
+       if (fence->syncobj)
+               device->ws->destroy_syncobj(device->ws, fence->syncobj);
+       if (fence->fence)
+               device->ws->destroy_fence(fence->fence);
        vk_free2(&device->alloc, pAllocator, fence);
 }
 
@@ -2689,6 +2745,18 @@ VkResult radv_WaitForFences(
                RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
                bool expired = false;
 
+               if (fence->temp_syncobj) {
+                       if (!device->ws->wait_syncobj(device->ws, fence->temp_syncobj, timeout))
+                               return VK_TIMEOUT;
+                       continue;
+               }
+
+               if (fence->syncobj) {
+                       if (!device->ws->wait_syncobj(device->ws, fence->syncobj, timeout))
+                               return VK_TIMEOUT;
+                       continue;
+               }
+
                if (fence->signalled)
                        continue;
 
@@ -2705,13 +2773,26 @@ VkResult radv_WaitForFences(
        return VK_SUCCESS;
 }
 
-VkResult radv_ResetFences(VkDevice device,
+VkResult radv_ResetFences(VkDevice _device,
                          uint32_t fenceCount,
                          const VkFence *pFences)
 {
+       RADV_FROM_HANDLE(radv_device, device, _device);
+
        for (unsigned i = 0; i < fenceCount; ++i) {
                RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
                fence->submitted = fence->signalled = false;
+
+               /* Per spec, we first restore the permanent payload, and then reset, so
+                * having a temp syncobj should not skip resetting the permanent syncobj. */
+               if (fence->temp_syncobj) {
+                       device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
+                       fence->temp_syncobj = 0;
+               }
+
+               if (fence->syncobj) {
+                       device->ws->reset_syncobj(device->ws, fence->syncobj);
+               }
        }
 
        return VK_SUCCESS;
@@ -2722,11 +2803,20 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
        RADV_FROM_HANDLE(radv_device, device, _device);
        RADV_FROM_HANDLE(radv_fence, fence, _fence);
 
+       if (fence->temp_syncobj) {
+                       bool success = device->ws->wait_syncobj(device->ws, fence->temp_syncobj, 0);
+                       return success ? VK_SUCCESS : VK_NOT_READY;
+       }
+
+       if (fence->syncobj) {
+                       bool success = device->ws->wait_syncobj(device->ws, fence->syncobj, 0);
+                       return success ? VK_SUCCESS : VK_NOT_READY;
+       }
+
        if (fence->signalled)
                return VK_SUCCESS;
        if (!fence->submitted)
                return VK_NOT_READY;
-
        if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
                return VK_NOT_READY;
 
@@ -2752,24 +2842,23 @@ VkResult radv_CreateSemaphore(
                                               sizeof(*sem), 8,
                                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (!sem)
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
        sem->temp_syncobj = 0;
        /* create a syncobject if we are going to export this semaphore */
        if (handleTypes) {
                assert (device->physical_device->rad_info.has_syncobj);
-               assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
                int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
                if (ret) {
                        vk_free2(&device->alloc, pAllocator, sem);
-                       return VK_ERROR_OUT_OF_HOST_MEMORY;
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
                }
                sem->sem = NULL;
        } else {
                sem->sem = device->ws->create_sem(device->ws);
                if (!sem->sem) {
                        vk_free2(&device->alloc, pAllocator, sem);
-                       return VK_ERROR_OUT_OF_HOST_MEMORY;
+                       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
                }
                sem->syncobj = 0;
        }
@@ -2807,14 +2896,14 @@ VkResult radv_CreateEvent(
                                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
        if (!event)
-               return VK_ERROR_OUT_OF_HOST_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
        event->bo = device->ws->buffer_create(device->ws, 8, 8,
                                              RADEON_DOMAIN_GTT,
-                                             RADEON_FLAG_CPU_ACCESS);
+                                             RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
        if (!event->bo) {
                vk_free2(&device->alloc, pAllocator, event);
-               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
        }
 
        event->map = (uint64_t*)device->ws->buffer_map(event->bo);
@@ -2891,6 +2980,9 @@ VkResult radv_CreateBuffer(
        buffer->offset = 0;
        buffer->flags = pCreateInfo->flags;
 
+       buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
+                                                EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
+
        if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
                buffer->bo = device->ws->buffer_create(device->ws,
                                                       align64(buffer->size, 4096),
@@ -2932,9 +3024,9 @@ si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
                return image->surface.u.legacy.tiling_index[level];
 }
 
-static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
+static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
 {
-       return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
+       return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
 }
 
 static void
@@ -2955,7 +3047,9 @@ radv_initialise_color_surface(struct radv_device *device,
        /* Intensity is implemented as Red, so treat it that way. */
        cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
 
-       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+       va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+
+       cb->cb_color_base = va >> 8;
 
        if (device->physical_device->rad_info.chip_class >= GFX9) {
                struct gfx9_surf_meta_flags meta;
@@ -2969,12 +3063,15 @@ radv_initialise_color_surface(struct radv_device *device,
                        S_028C74_RB_ALIGNED(meta.rb_aligned) |
                        S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
 
-               va += iview->image->surface.u.gfx9.surf_offset >> 8;
+               cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
+               cb->cb_color_base |= iview->image->surface.tile_swizzle;
        } else {
                const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
                unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
 
-               va += level_info->offset;
+               cb->cb_color_base += level_info->offset >> 8;
+               if (level_info->mode == RADEON_SURF_MODE_2D)
+                       cb->cb_color_base |= iview->image->surface.tile_swizzle;
 
                pitch_tile_max = level_info->nblk_x / 8 - 1;
                slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
@@ -2985,7 +3082,6 @@ radv_initialise_color_surface(struct radv_device *device,
                cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
 
                cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
-               cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
 
                if (iview->image->fmask.size) {
                        if (device->physical_device->rad_info.chip_class >= CIK)
@@ -3001,23 +3097,19 @@ radv_initialise_color_surface(struct radv_device *device,
                }
        }
 
-       cb->cb_color_base = va >> 8;
-       if (device->physical_device->rad_info.chip_class < GFX9)
-               cb->cb_color_base |= iview->image->surface.u.legacy.tile_swizzle;
        /* CMASK variables */
-       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+       va = radv_buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->cmask.offset;
        cb->cb_color_cmask = va >> 8;
 
-       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+       va = radv_buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->dcc_offset;
        cb->cb_dcc_base = va >> 8;
-       if (device->physical_device->rad_info.chip_class < GFX9)
-               cb->cb_dcc_base |= iview->image->surface.u.legacy.tile_swizzle;
+       cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
 
-       uint32_t max_slice = radv_surface_layer_count(iview);
+       uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
        cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
-               S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
+               S_028C6C_SLICE_MAX(max_slice);
 
        if (iview->image->info.samples > 1) {
                unsigned log_samples = util_logbase2(iview->image->info.samples);
@@ -3027,10 +3119,9 @@ radv_initialise_color_surface(struct radv_device *device,
        }
 
        if (iview->image->fmask.size) {
-               va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
+               va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
                cb->cb_color_fmask = va >> 8;
-               if (device->physical_device->rad_info.chip_class < GFX9)
-                       cb->cb_color_fmask |= iview->image->surface.u.legacy.tile_swizzle;
+               cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
        } else {
                cb->cb_color_fmask = cb->cb_color_base;
        }
@@ -3077,28 +3168,52 @@ radv_initialise_color_surface(struct radv_device *device,
                                    format != V_028C70_COLOR_24_8) |
                S_028C70_NUMBER_TYPE(ntype) |
                S_028C70_ENDIAN(endian);
-       if (iview->image->info.samples > 1)
-               if (iview->image->fmask.size)
-                       cb->cb_color_info |= S_028C70_COMPRESSION(1);
+       if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
+               cb->cb_color_info |= S_028C70_COMPRESSION(1);
+               if (device->physical_device->rad_info.chip_class == SI) {
+                       unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
+                       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+               }
+       }
 
        if (iview->image->cmask.size &&
-           !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+           !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
                cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
 
-       if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
+       if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
                cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
 
        if (device->physical_device->rad_info.chip_class >= VI) {
-               unsigned max_uncompressed_block_size = 2;
+               unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
+               unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
+               unsigned independent_64b_blocks = 0;
+               unsigned max_compressed_block_size;
+
+               /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
+                  64 for APU because all of our APUs to date use DIMMs which have
+                  a request granularity size of 64B while all other chips have a
+                  32B request size */
+               if (!device->physical_device->rad_info.has_dedicated_vram)
+                       min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
+
                if (iview->image->info.samples > 1) {
                        if (iview->image->surface.bpe == 1)
-                               max_uncompressed_block_size = 0;
+                               max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
                        else if (iview->image->surface.bpe == 2)
-                               max_uncompressed_block_size = 1;
+                               max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
                }
 
+               if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                                          VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+                       independent_64b_blocks = 1;
+                       max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+               } else
+                       max_compressed_block_size = max_uncompressed_block_size;
+
                cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
-                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
+                       S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
+                       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
+                       S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
        }
 
        /* This must be set for fast clear to work without FMASK. */
@@ -3109,18 +3224,15 @@ radv_initialise_color_surface(struct radv_device *device,
        }
 
        if (device->physical_device->rad_info.chip_class >= GFX9) {
-               uint32_t max_slice = radv_surface_layer_count(iview);
-               unsigned mip0_depth = iview->base_layer + max_slice - 1;
+               unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
+                 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
 
                cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
                cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
                        S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
-               cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
-                       S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
-                       S_028C68_MAX_MIP(iview->image->info.levels);
-
-               cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
-
+               cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
+                       S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
+                       S_028C68_MAX_MIP(iview->image->info.levels - 1);
        }
 }
 
@@ -3159,17 +3271,17 @@ radv_initialise_ds_surface(struct radv_device *device,
        }
 
        format = radv_translate_dbformat(iview->image->vk_format);
-       stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
+       stencil_format = iview->image->surface.has_stencil ?
                V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
 
-       uint32_t max_slice = radv_surface_layer_count(iview);
+       uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
        ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
-               S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
+               S_028008_SLICE_MAX(max_slice);
 
        ds->db_htile_data_base = 0;
        ds->db_htile_surface = 0;
 
-       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+       va = radv_buffer_get_va(iview->bo) + iview->image->offset;
        s_offs = z_offs = va;
 
        if (device->physical_device->rad_info.chip_class >= GFX9) {
@@ -3190,14 +3302,25 @@ radv_initialise_ds_surface(struct radv_device *device,
                ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
                        S_02801C_Y_MAX(iview->image->info.height - 1);
 
-               /* Only use HTILE for the first level. */
-               if (iview->image->surface.htile_size && !level) {
+               if (radv_htile_enabled(iview->image, level)) {
                        ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
 
-                       if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+                       if (iview->image->tc_compatible_htile) {
+                               unsigned max_zplanes = 4;
+
+                               if (iview->vk_format == VK_FORMAT_D16_UNORM  &&
+                                   iview->image->info.samples > 1)
+                                       max_zplanes = 2;
+
+                               ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
+                                         S_028038_ITERATE_FLUSH(1);
+                               ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+                       }
+
+                       if (!iview->image->surface.has_stencil)
                                /* Use all of the htile_buffer for depth if there's no stencil. */
                                ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
-                       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+                       va = radv_buffer_get_va(iview->bo) + iview->image->offset +
                                iview->image->htile_offset;
                        ds->db_htile_data_base = va >> 8;
                        ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
@@ -3213,7 +3336,7 @@ radv_initialise_ds_surface(struct radv_device *device,
                z_offs += iview->image->surface.u.legacy.level[level].offset;
                s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
 
-               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
                ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
                ds->db_stencil_info = S_028044_FORMAT(stencil_format);
 
@@ -3254,17 +3377,29 @@ radv_initialise_ds_surface(struct radv_device *device,
                        S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
                ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
 
-               if (iview->image->surface.htile_size && !level) {
+               if (radv_htile_enabled(iview->image, level)) {
                        ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
 
-                       if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+                       if (!iview->image->surface.has_stencil &&
+                           !iview->image->tc_compatible_htile)
                                /* Use all of the htile_buffer for depth if there's no stencil. */
                                ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
 
-                       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+                       va = radv_buffer_get_va(iview->bo) + iview->image->offset +
                                iview->image->htile_offset;
                        ds->db_htile_data_base = va >> 8;
                        ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+                       if (iview->image->tc_compatible_htile) {
+                               ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+                               if (iview->image->info.samples <= 1)
+                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+                               else if (iview->image->info.samples <= 4)
+                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+                               else
+                                       ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+                       }
                }
        }
 
@@ -3305,7 +3440,7 @@ VkResult radv_CreateFramebuffer(
                }
                framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
                framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
-               framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
+               framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
        }
 
        *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
@@ -3461,7 +3596,7 @@ radv_init_sampler(struct radv_device *device,
                             S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
                             S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
                             S_008F38_MIP_POINT_PRECLAMP(0) |
-                            S_008F38_DISABLE_LSB_CEIL(1) |
+                            S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
                             S_008F38_FILTER_PREC_FIX(1) |
                             S_008F38_ANISO_OVERRIDE(is_vi));
        sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
@@ -3556,13 +3691,15 @@ VkResult radv_GetMemoryFdKHR(VkDevice _device,
 
        assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
 
-       /* We support only one handle type. */
+       /* At the moment, we support only the below handle types. */
        assert(pGetFdInfo->handleType ==
-              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
+              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
+              pGetFdInfo->handleType ==
+              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
        bool ret = radv_get_memory_fd(device, memory, pFD);
        if (ret == false)
-               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+               return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
        return VK_SUCCESS;
 }
 
@@ -3571,13 +3708,68 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
                                       int fd,
                                       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
 {
-   /* The valid usage section for this function says:
-    *
-    *    "handleType must not be one of the handle types defined as opaque."
-    *
-    * Since we only handle opaque handles for now, there are no FD properties.
-    */
-   return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+   switch (handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+      pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
+      return VK_SUCCESS;
+
+   default:
+      /* The valid usage section for this function says:
+       *
+       *    "handleType must not be one of the handle types defined as
+       *    opaque."
+       *
+       * So opaque handle types fall into the default "unsupported" case.
+       */
+      return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+   }
+}
+
+static VkResult radv_import_opaque_fd(struct radv_device *device,
+                                      int fd,
+                                      uint32_t *syncobj)
+{
+       uint32_t syncobj_handle = 0;
+       int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
+       if (ret != 0)
+               return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+
+       if (*syncobj)
+               device->ws->destroy_syncobj(device->ws, *syncobj);
+
+       *syncobj = syncobj_handle;
+       close(fd);
+
+       return VK_SUCCESS;
+}
+
+static VkResult radv_import_sync_fd(struct radv_device *device,
+                                    int fd,
+                                    uint32_t *syncobj)
+{
+       /* If we create a syncobj we do it locally so that if we have an error, we don't
+        * leave a syncobj in an undetermined state in the fence. */
+       uint32_t syncobj_handle =  *syncobj;
+       if (!syncobj_handle) {
+               int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
+               if (ret) {
+                       return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+               }
+       }
+
+       if (fd == -1) {
+               device->ws->signal_syncobj(device->ws, syncobj_handle);
+       } else {
+               int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
+       if (ret != 0)
+               return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+       }
+
+       *syncobj = syncobj_handle;
+       if (fd != -1)
+               close(fd);
+
+       return VK_SUCCESS;
 }
 
 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
@@ -3585,20 +3777,22 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
 {
        RADV_FROM_HANDLE(radv_device, device, _device);
        RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
-       uint32_t syncobj_handle = 0;
-       assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
-
-       int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
-       if (ret != 0)
-               return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
+       uint32_t *syncobj_dst = NULL;
 
        if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
-               sem->temp_syncobj = syncobj_handle;
+               syncobj_dst = &sem->temp_syncobj;
        } else {
-               sem->syncobj = syncobj_handle;
+               syncobj_dst = &sem->syncobj;
+       }
+
+       switch(pImportSemaphoreFdInfo->handleType) {
+               case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+                       return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
+               case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+                       return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
+               default:
+                       unreachable("Unhandled semaphore handle type");
        }
-       close(pImportSemaphoreFdInfo->fd);
-       return VK_SUCCESS;
 }
 
 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
@@ -3610,12 +3804,22 @@ VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
        int ret;
        uint32_t syncobj_handle;
 
-       assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
        if (sem->temp_syncobj)
                syncobj_handle = sem->temp_syncobj;
        else
                syncobj_handle = sem->syncobj;
-       ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+
+       switch(pGetFdInfo->handleType) {
+       case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+               ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+               break;
+       case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+               ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
+               break;
+       default:
+               unreachable("Unhandled semaphore handle type");
+       }
+
        if (ret)
                return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
        return VK_SUCCESS;
@@ -3626,7 +3830,17 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
        const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
        VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
 {
-       if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
+       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+
+       /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
+       if (pdevice->rad_info.has_syncobj_wait_for_submit &&
+           (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
+            pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
+               pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
+               pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
+               pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
+                       VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+       } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
                pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
                pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
                pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
@@ -3637,3 +3851,78 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
                pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
        }
 }
+
+VkResult radv_ImportFenceFdKHR(VkDevice _device,
+                                  const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
+       uint32_t *syncobj_dst = NULL;
+
+
+       if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
+               syncobj_dst = &fence->temp_syncobj;
+       } else {
+               syncobj_dst = &fence->syncobj;
+       }
+
+       switch(pImportFenceFdInfo->handleType) {
+               case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+                       return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
+               case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+                       return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
+               default:
+                       unreachable("Unhandled fence handle type");
+       }
+}
+
+VkResult radv_GetFenceFdKHR(VkDevice _device,
+                               const VkFenceGetFdInfoKHR *pGetFdInfo,
+                               int *pFd)
+{
+       RADV_FROM_HANDLE(radv_device, device, _device);
+       RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
+       int ret;
+       uint32_t syncobj_handle;
+
+       if (fence->temp_syncobj)
+               syncobj_handle = fence->temp_syncobj;
+       else
+               syncobj_handle = fence->syncobj;
+
+       switch(pGetFdInfo->handleType) {
+       case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+               ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+               break;
+       case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
+               ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
+               break;
+       default:
+               unreachable("Unhandled fence handle type");
+       }
+
+       if (ret)
+               return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
+       return VK_SUCCESS;
+}
+
+void radv_GetPhysicalDeviceExternalFencePropertiesKHR(
+       VkPhysicalDevice                            physicalDevice,
+       const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
+       VkExternalFencePropertiesKHR*           pExternalFenceProperties)
+{
+       RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+
+       if (pdevice->rad_info.has_syncobj_wait_for_submit &&
+           (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || 
+            pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
+               pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
+               pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
+               pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
+                       VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+       } else {
+               pExternalFenceProperties->exportFromImportedHandleTypes = 0;
+               pExternalFenceProperties->compatibleHandleTypes = 0;
+               pExternalFenceProperties->externalFenceFeatures = 0;
+       }
+}