radv: add a new debug option called RADV_DEBUG=noshaderballot
[mesa.git] / src / amd / vulkan / radv_device.c
index d22c43b1098247a4fdf2877f813a070161155628..f77430d55be3d7bbac85ea2f575c1c41389f217f 100644 (file)
@@ -348,7 +348,8 @@ radv_physical_device_init(struct radv_physical_device *device,
                device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
                                         device->rad_info.family == CHIP_VEGA12 ||
                                         device->rad_info.family == CHIP_RAVEN ||
-                                        device->rad_info.family == CHIP_RAVEN2;
+                                        device->rad_info.family == CHIP_RAVEN2 ||
+                                        device->rad_info.family == CHIP_RENOIR;
        }
 
        /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
@@ -362,6 +363,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
                                  device->rad_info.family == CHIP_RAVEN;
 
+       device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10;
+
        /* Out-of-order primitive rasterization. */
        device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.max_se >= 2;
@@ -376,9 +379,29 @@ radv_physical_device_init(struct radv_physical_device *device,
                                       (device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.me_fw_feature >= 41);
 
-       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2;
+       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
+                                         device->rad_info.family == CHIP_RENOIR ||
+                                         device->rad_info.chip_class >= GFX10;
+
+       device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
+                                   device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+
+       /* Determine the number of threads per wave for all stages. */
+       device->cs_wave_size = 64;
+       device->ps_wave_size = 64;
+       device->ge_wave_size = 64;
 
-       device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+       if (device->rad_info.chip_class >= GFX10) {
+               if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
+                       device->cs_wave_size = 32;
+
+               /* For pixel shaders, wave64 is recommanded. */
+               if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
+                       device->ps_wave_size = 32;
+
+               if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
+                       device->ge_wave_size = 32;
+       }
 
        radv_physical_device_init_mem_types(device);
        radv_fill_device_extension_table(device, &device->supported_extensions);
@@ -472,6 +495,8 @@ static const struct debug_control radv_debug_options[] = {
        {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
        {"nobinning", RADV_DEBUG_NOBINNING},
        {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
+       {"nongg", RADV_DEBUG_NO_NGG},
+       {"noshaderballot", RADV_DEBUG_NO_SHADER_BALLOT},
        {NULL, 0}
 };
 
@@ -490,6 +515,9 @@ static const struct debug_control radv_perftest_options[] = {
        {"bolist", RADV_PERFTEST_BO_LIST},
        {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
        {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
+       {"cswave32", RADV_PERFTEST_CS_WAVE_32},
+       {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+       {"gewave32", RADV_PERFTEST_GE_WAVE_32},
        {NULL, 0}
 };
 
@@ -949,11 +977,8 @@ void radv_GetPhysicalDeviceFeatures2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
                        VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
                                (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
-                       /* TODO: Enable this once the driver supports 64-bit
-                        * compare&swap atomic operations.
-                        */
-                       features->shaderBufferInt64Atomics = false;
-                       features->shaderSharedInt64Atomics = false;
+                       features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900;
+                       features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900;
                        break;
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
@@ -983,6 +1008,24 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->uniformBufferStandardLayout = true;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+                       VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+                               (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+                       features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
+                       break;
+               }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: {
+                       VkPhysicalDeviceImagelessFramebufferFeaturesKHR *features =
+                               (VkPhysicalDeviceImagelessFramebufferFeaturesKHR *)ext;
+                       features->imagelessFramebuffer = true;
+                       break;
+               }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
+                       VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
+                               (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
+                       features->pipelineExecutableInfo = true;
+                       break;
+               }
                default:
                        break;
                }
@@ -1335,10 +1378,7 @@ void radv_GetPhysicalDeviceProperties2(
                                (VkPhysicalDeviceDriverPropertiesKHR *) ext;
 
                        driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
-                       memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
-                       strcpy(driver_props->driverName, "radv");
-
-                       memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
+                       snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv");
                        snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
                                "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
                                " (LLVM " MESA_LLVM_VERSION_STRING ")");
@@ -1862,6 +1902,9 @@ VkResult radv_CreateDevice(
                device->enabled_extensions.EXT_descriptor_indexing ||
                device->enabled_extensions.EXT_buffer_device_address;
 
+       device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
+                                      pCreateInfo->pEnabledFeatures->robustBufferAccess;
+
        mtx_init(&device->shader_slab_mutex, mtx_plain);
        list_inithead(&device->shader_slabs);
 
@@ -1898,10 +1941,10 @@ VkResult radv_CreateDevice(
        device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
                              !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
 
-       /* Disabled and not implemented for now. */
        device->dfsm_allowed = device->pbb_allowed &&
                               (device->physical_device->rad_info.family == CHIP_RAVEN ||
-                               device->physical_device->rad_info.family == CHIP_RAVEN2);
+                               device->physical_device->rad_info.family == CHIP_RAVEN2 ||
+                               device->physical_device->rad_info.family == CHIP_RENOIR);
 
 #ifdef ANDROID
        device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
@@ -1923,7 +1966,8 @@ VkResult radv_CreateDevice(
        device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
                                     max_threads_per_block / 64);
 
-       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
+       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
+                                    S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32);
 
        if (device->physical_device->rad_info.chip_class >= GFX7) {
                /* If the KMD allows it (there is a KMD hw register for it),
@@ -1968,9 +2012,12 @@ VkResult radv_CreateDevice(
                device->empty_cs[family] = device->ws->cs_create(device->ws, family);
                switch (family) {
                case RADV_QUEUE_GENERAL:
-                       radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
-                       radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
-                       radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
+                     /* Since amdgpu version 3.6.0, CONTEXT_CONTROL is emitted by the kernel */
+                       if (device->physical_device->rad_info.drm_minor < 6) {
+                               radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+                               radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
+                               radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
+                       }
                        break;
                case RADV_QUEUE_COMPUTE:
                        radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
@@ -2152,36 +2199,44 @@ fill_geom_tess_rings(struct radv_queue *queue,
                   index stride 64 */
                desc[0] = esgs_va;
                desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
-                         S_008F04_STRIDE(0) |
                          S_008F04_SWIZZLE_ENABLE(true);
                desc[2] = esgs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(3) |
-                         S_008F0C_ADD_TID_ENABLE(true);
+                         S_008F0C_ADD_TID_ENABLE(1);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
+               }
 
                /* GS entry for ES->GS ring */
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[4] = esgs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
                desc[6] = esgs_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
        }
 
        desc += 8;
@@ -2193,37 +2248,46 @@ fill_geom_tess_rings(struct radv_queue *queue,
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[0] = gsvs_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
                desc[2] = gsvs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
 
                /* stride gsvs_itemsize, num records 64
                   elsize 4, index stride 16 */
                /* shader will patch stride and desc[2] */
                desc[4] = gsvs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(true);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
+                         S_008F04_SWIZZLE_ENABLE(1);
                desc[6] = 0;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(1) |
                          S_008F0C_ADD_TID_ENABLE(true);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
+               }
+
        }
 
        desc += 8;
@@ -2233,34 +2297,38 @@ fill_geom_tess_rings(struct radv_queue *queue,
                uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
 
                desc[0] = tess_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
                desc[2] = tess_factor_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(3) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
 
                desc[4] = tess_offchip_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
                desc[6] = tess_offchip_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(3) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
        }
 
        desc += 8;
@@ -2300,9 +2368,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
         *
         * Follow AMDVLK here.
         */
-       if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
-           device->physical_device->rad_info.chip_class == GFX7 ||
-           device->physical_device->rad_info.chip_class == GFX6)
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               max_offchip_buffers_per_se = 256;
+       } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+                  device->physical_device->rad_info.chip_class == GFX7 ||
+                  device->physical_device->rad_info.chip_class == GFX6)
                --max_offchip_buffers_per_se;
 
        max_offchip_buffers = max_offchip_buffers_per_se *
@@ -2326,9 +2396,12 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
        case GFX7:
        case GFX8:
        case GFX9:
-       default:
                max_offchip_buffers = MIN2(max_offchip_buffers, 508);
                break;
+       case GFX10:
+               break;
+       default:
+               break;
        }
 
        *max_offchip_buffers_p = max_offchip_buffers;
@@ -2447,14 +2520,14 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
        if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-                                  R_00B230_SPI_SHADER_USER_DATA_GS_0,
-                                  R_00B430_SPI_SHADER_USER_DATA_HS_0};
+                                  R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                                  R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
 
                for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
                        radv_emit_shader_pointer(queue->device, cs, regs[i],
                                                 va, true);
                }
-       } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
                                   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
@@ -2704,6 +2777,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
                }
@@ -4297,7 +4371,7 @@ radv_init_dcc_control_reg(struct radv_device *device,
               S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
 }
 
-static void
+void
 radv_initialise_color_surface(struct radv_device *device,
                              struct radv_color_buffer_info *cb,
                              struct radv_image_view *iview)
@@ -4356,15 +4430,15 @@ radv_initialise_color_surface(struct radv_device *device,
 
                cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
                cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-               cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
+               cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
 
                cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
 
                if (radv_image_has_fmask(iview->image)) {
                        if (device->physical_device->rad_info.chip_class >= GFX7)
-                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
-                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
-                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
+                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
+                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
+                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
                } else {
                        /* This must be set for fast clear to work without FMASK. */
                        if (device->physical_device->rad_info.chip_class >= GFX7)
@@ -4376,7 +4450,7 @@ radv_initialise_color_surface(struct radv_device *device,
 
        /* CMASK variables */
        va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-       va += iview->image->cmask.offset;
+       va += iview->image->cmask_offset;
        cb->cb_color_cmask = va >> 8;
 
        va = radv_buffer_get_va(iview->bo) + iview->image->offset;
@@ -4386,8 +4460,11 @@ radv_initialise_color_surface(struct radv_device *device,
            device->physical_device->rad_info.chip_class <= GFX8)
                va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
 
+       unsigned dcc_tile_swizzle = surf->tile_swizzle;
+       dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
        cb->cb_dcc_base = va >> 8;
-       cb->cb_dcc_base |= surf->tile_swizzle;
+       cb->cb_dcc_base |= dcc_tile_swizzle;
 
        /* GFX10 field has the same base shift as the GFX6 field. */
        uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
@@ -4402,9 +4479,9 @@ radv_initialise_color_surface(struct radv_device *device,
        }
 
        if (radv_image_has_fmask(iview->image)) {
-               va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
+               va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset;
                cb->cb_color_fmask = va >> 8;
-               cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
+               cb->cb_color_fmask |= surf->fmask_tile_swizzle;
        } else {
                cb->cb_color_fmask = cb->cb_color_base;
        }
@@ -4415,7 +4492,7 @@ radv_initialise_color_surface(struct radv_device *device,
        format = radv_translate_colorformat(iview->vk_format);
        if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
                radv_finishme("Illegal color\n");
-       swap = radv_translate_colorswap(iview->vk_format, FALSE);
+       swap = radv_translate_colorswap(iview->vk_format, false);
        endian = radv_colorformat_endian_swap(format);
 
        /* blend clamp should be set for all NORM/SRGB types */
@@ -4454,7 +4531,7 @@ radv_initialise_color_surface(struct radv_device *device,
        if (radv_image_has_fmask(iview->image)) {
                cb->cb_color_info |= S_028C70_COMPRESSION(1);
                if (device->physical_device->rad_info.chip_class == GFX6) {
-                       unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
+                       unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
                        cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
                }
 
@@ -4554,7 +4631,7 @@ radv_calc_decompress_on_z_planes(struct radv_device *device,
        return max_zplanes;
 }
 
-static void
+void
 radv_initialise_ds_surface(struct radv_device *device,
                           struct radv_ds_buffer_info *ds,
                           struct radv_image_view *iview)
@@ -4748,11 +4825,15 @@ VkResult radv_CreateFramebuffer(
 {
        RADV_FROM_HANDLE(radv_device, device, _device);
        struct radv_framebuffer *framebuffer;
+       const VkFramebufferAttachmentsCreateInfoKHR *imageless_create_info =
+               vk_find_struct_const(pCreateInfo->pNext,
+                       FRAMEBUFFER_ATTACHMENTS_CREATE_INFO_KHR);
 
        assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
 
-       size_t size = sizeof(*framebuffer) +
-               sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
+       size_t size = sizeof(*framebuffer);
+       if (!imageless_create_info)
+               size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
        framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
                                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (framebuffer == NULL)
@@ -4762,18 +4843,23 @@ VkResult radv_CreateFramebuffer(
        framebuffer->width = pCreateInfo->width;
        framebuffer->height = pCreateInfo->height;
        framebuffer->layers = pCreateInfo->layers;
-       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
-               VkImageView _iview = pCreateInfo->pAttachments[i];
-               struct radv_image_view *iview = radv_image_view_from_handle(_iview);
-               framebuffer->attachments[i].attachment = iview;
-               if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
-                       radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
-               } else {
-                       radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
+       if (imageless_create_info) {
+               for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
+                       const VkFramebufferAttachmentImageInfoKHR *attachment =
+                               imageless_create_info->pAttachmentImageInfos + i;
+                       framebuffer->width = MIN2(framebuffer->width, attachment->width);
+                       framebuffer->height = MIN2(framebuffer->height, attachment->height);
+                       framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
+               }
+       } else {
+               for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+                       VkImageView _iview = pCreateInfo->pAttachments[i];
+                       struct radv_image_view *iview = radv_image_view_from_handle(_iview);
+                       framebuffer->attachments[i] = iview;
+                       framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
+                       framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
+                       framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
                }
-               framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
-               framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
-               framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
        }
 
        *pFramebuffer = radv_framebuffer_to_handle(framebuffer);