radv/gfx10: enable VK_EXT_transform_feedback
[mesa.git] / src / amd / vulkan / radv_device.c
index d22c43b1098247a4fdf2877f813a070161155628..9ba100df6e87b59ed0ac14ea2cbf10fe3c8f1623 100644 (file)
@@ -362,6 +362,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
                                  device->rad_info.family == CHIP_RAVEN;
 
+       device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10;
+
        /* Out-of-order primitive rasterization. */
        device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.max_se >= 2;
@@ -376,7 +378,8 @@ radv_physical_device_init(struct radv_physical_device *device,
                                       (device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.me_fw_feature >= 41);
 
-       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2;
+       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
+                                         device->rad_info.chip_class >= GFX10;
 
        device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
 
@@ -472,6 +475,7 @@ static const struct debug_control radv_debug_options[] = {
        {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
        {"nobinning", RADV_DEBUG_NOBINNING},
        {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
+       {"nongg", RADV_DEBUG_NO_NGG},
        {NULL, 0}
 };
 
@@ -1335,10 +1339,7 @@ void radv_GetPhysicalDeviceProperties2(
                                (VkPhysicalDeviceDriverPropertiesKHR *) ext;
 
                        driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
-                       memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
-                       strcpy(driver_props->driverName, "radv");
-
-                       memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
+                       snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv");
                        snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
                                "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
                                " (LLVM " MESA_LLVM_VERSION_STRING ")");
@@ -2152,36 +2153,44 @@ fill_geom_tess_rings(struct radv_queue *queue,
                   index stride 64 */
                desc[0] = esgs_va;
                desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
-                         S_008F04_STRIDE(0) |
                          S_008F04_SWIZZLE_ENABLE(true);
                desc[2] = esgs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(3) |
-                         S_008F0C_ADD_TID_ENABLE(true);
+                         S_008F0C_ADD_TID_ENABLE(1);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
+               }
 
                /* GS entry for ES->GS ring */
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[4] = esgs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
                desc[6] = esgs_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
        }
 
        desc += 8;
@@ -2193,37 +2202,46 @@ fill_geom_tess_rings(struct radv_queue *queue,
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[0] = gsvs_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
                desc[2] = gsvs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
 
                /* stride gsvs_itemsize, num records 64
                   elsize 4, index stride 16 */
                /* shader will patch stride and desc[2] */
                desc[4] = gsvs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(true);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
+                         S_008F04_SWIZZLE_ENABLE(1);
                desc[6] = 0;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(1) |
                          S_008F0C_ADD_TID_ENABLE(true);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(2) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
+               }
+
        }
 
        desc += 8;
@@ -2233,34 +2251,38 @@ fill_geom_tess_rings(struct radv_queue *queue,
                uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
 
                desc[0] = tess_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
                desc[2] = tess_factor_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(3) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
 
                desc[4] = tess_offchip_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
                desc[6] = tess_offchip_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+               if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+                       desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+                                  S_008F0C_OOB_SELECT(3) |
+                                  S_008F0C_RESOURCE_LEVEL(1);
+               } else {
+                       desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+               }
        }
 
        desc += 8;
@@ -2300,9 +2322,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
         *
         * Follow AMDVLK here.
         */
-       if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
-           device->physical_device->rad_info.chip_class == GFX7 ||
-           device->physical_device->rad_info.chip_class == GFX6)
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               max_offchip_buffers_per_se = 256;
+       } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+                  device->physical_device->rad_info.chip_class == GFX7 ||
+                  device->physical_device->rad_info.chip_class == GFX6)
                --max_offchip_buffers_per_se;
 
        max_offchip_buffers = max_offchip_buffers_per_se *
@@ -2326,9 +2350,12 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
        case GFX7:
        case GFX8:
        case GFX9:
-       default:
                max_offchip_buffers = MIN2(max_offchip_buffers, 508);
                break;
+       case GFX10:
+               break;
+       default:
+               break;
        }
 
        *max_offchip_buffers_p = max_offchip_buffers;
@@ -2447,14 +2474,14 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
        if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-                                  R_00B230_SPI_SHADER_USER_DATA_GS_0,
-                                  R_00B430_SPI_SHADER_USER_DATA_HS_0};
+                                  R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                                  R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
 
                for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
                        radv_emit_shader_pointer(queue->device, cs, regs[i],
                                                 va, true);
                }
-       } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
                                   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
@@ -2704,6 +2731,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
                }
@@ -4386,8 +4414,11 @@ radv_initialise_color_surface(struct radv_device *device,
            device->physical_device->rad_info.chip_class <= GFX8)
                va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
 
+       unsigned dcc_tile_swizzle = surf->tile_swizzle;
+       dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
        cb->cb_dcc_base = va >> 8;
-       cb->cb_dcc_base |= surf->tile_swizzle;
+       cb->cb_dcc_base |= dcc_tile_swizzle;
 
        /* GFX10 field has the same base shift as the GFX6 field. */
        uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;