freedreno/gmem: small cleanup
[mesa.git] / src / amd / vulkan / radv_device.c
index 1f956e599541407770d63383f04e33f3b1d2166d..cb83cde08cfca85eba97679841d853ed730d94a1 100644 (file)
@@ -362,6 +362,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
                                  device->rad_info.family == CHIP_RAVEN;
 
+       device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10;
+
        /* Out-of-order primitive rasterization. */
        device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.max_se >= 2;
@@ -376,10 +378,28 @@ radv_physical_device_init(struct radv_physical_device *device,
                                       (device->rad_info.chip_class >= GFX8 &&
                                        device->rad_info.me_fw_feature >= 41);
 
-       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2;
+       device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
+                                         device->rad_info.chip_class >= GFX10;
 
        device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
 
+       /* Determine the number of threads per wave for all stages. */
+       device->cs_wave_size = 64;
+       device->ps_wave_size = 64;
+       device->ge_wave_size = 64;
+
+       if (device->rad_info.chip_class >= GFX10) {
+               if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
+                       device->cs_wave_size = 32;
+
+               /* For pixel shaders, wave64 is recommanded. */
+               if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
+                       device->ps_wave_size = 32;
+
+               if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
+                       device->ge_wave_size = 32;
+       }
+
        radv_physical_device_init_mem_types(device);
        radv_fill_device_extension_table(device, &device->supported_extensions);
 
@@ -472,6 +492,7 @@ static const struct debug_control radv_debug_options[] = {
        {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
        {"nobinning", RADV_DEBUG_NOBINNING},
        {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
+       {"nongg", RADV_DEBUG_NO_NGG},
        {NULL, 0}
 };
 
@@ -490,6 +511,9 @@ static const struct debug_control radv_perftest_options[] = {
        {"bolist", RADV_PERFTEST_BO_LIST},
        {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
        {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
+       {"cswave32", RADV_PERFTEST_CS_WAVE_32},
+       {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+       {"gewave32", RADV_PERFTEST_GE_WAVE_32},
        {NULL, 0}
 };
 
@@ -949,11 +973,8 @@ void radv_GetPhysicalDeviceFeatures2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
                        VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
                                (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
-                       /* TODO: Enable this once the driver supports 64-bit
-                        * compare&swap atomic operations.
-                        */
-                       features->shaderBufferInt64Atomics = false;
-                       features->shaderSharedInt64Atomics = false;
+                       features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900;
+                       features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900;
                        break;
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
@@ -983,6 +1004,12 @@ void radv_GetPhysicalDeviceFeatures2(
                        features->uniformBufferStandardLayout = true;
                        break;
                }
+               case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+                       VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+                               (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+                       features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
+                       break;
+               }
                default:
                        break;
                }
@@ -1335,10 +1362,7 @@ void radv_GetPhysicalDeviceProperties2(
                                (VkPhysicalDeviceDriverPropertiesKHR *) ext;
 
                        driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
-                       memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
-                       strcpy(driver_props->driverName, "radv");
-
-                       memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
+                       snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv");
                        snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
                                "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
                                " (LLVM " MESA_LLVM_VERSION_STRING ")");
@@ -1923,7 +1947,8 @@ VkResult radv_CreateDevice(
        device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
                                     max_threads_per_block / 64);
 
-       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
+       device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
+                                    S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32);
 
        if (device->physical_device->rad_info.chip_class >= GFX7) {
                /* If the KMD allows it (there is a KMD hw register for it),
@@ -2152,16 +2177,14 @@ fill_geom_tess_rings(struct radv_queue *queue,
                   index stride 64 */
                desc[0] = esgs_va;
                desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
-                         S_008F04_STRIDE(0) |
                          S_008F04_SWIZZLE_ENABLE(true);
                desc[2] = esgs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(3) |
-                         S_008F0C_ADD_TID_ENABLE(true);
+                         S_008F0C_ADD_TID_ENABLE(1);
 
                if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                        desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
@@ -2169,24 +2192,20 @@ fill_geom_tess_rings(struct radv_queue *queue,
                                   S_008F0C_RESOURCE_LEVEL(1);
                } else {
                        desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
                }
 
                /* GS entry for ES->GS ring */
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[4] = esgs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
                desc[6] = esgs_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
                if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                        desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
@@ -2207,17 +2226,12 @@ fill_geom_tess_rings(struct radv_queue *queue,
                /* stride 0, num records - size, elsize0,
                   index stride 0 */
                desc[0] = gsvs_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
                desc[2] = gsvs_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_ELEMENT_SIZE(0) |
-                         S_008F0C_INDEX_STRIDE(0) |
-                         S_008F0C_ADD_TID_ENABLE(false);
+                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
                if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                        desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
@@ -2232,15 +2246,13 @@ fill_geom_tess_rings(struct radv_queue *queue,
                   elsize 4, index stride 16 */
                /* shader will patch stride and desc[2] */
                desc[4] = gsvs_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(true);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
+                         S_008F04_SWIZZLE_ENABLE(1);
                desc[6] = 0;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                          S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                          S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                         S_008F0C_ELEMENT_SIZE(1) |
                          S_008F0C_INDEX_STRIDE(1) |
                          S_008F0C_ADD_TID_ENABLE(true);
 
@@ -2250,7 +2262,8 @@ fill_geom_tess_rings(struct radv_queue *queue,
                                   S_008F0C_RESOURCE_LEVEL(1);
                } else {
                        desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+                                  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                                  S_008F0C_ELEMENT_SIZE(1);
                }
 
        }
@@ -2262,9 +2275,7 @@ fill_geom_tess_rings(struct radv_queue *queue,
                uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
 
                desc[0] = tess_va;
-               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
                desc[2] = tess_factor_ring_size;
                desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
@@ -2281,9 +2292,7 @@ fill_geom_tess_rings(struct radv_queue *queue,
                }
 
                desc[4] = tess_offchip_va;
-               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
-                         S_008F04_STRIDE(0) |
-                         S_008F04_SWIZZLE_ENABLE(false);
+               desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
                desc[6] = tess_offchip_ring_size;
                desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                          S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
@@ -2337,9 +2346,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
         *
         * Follow AMDVLK here.
         */
-       if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
-           device->physical_device->rad_info.chip_class == GFX7 ||
-           device->physical_device->rad_info.chip_class == GFX6)
+       if (device->physical_device->rad_info.chip_class >= GFX10) {
+               max_offchip_buffers_per_se = 256;
+       } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+                  device->physical_device->rad_info.chip_class == GFX7 ||
+                  device->physical_device->rad_info.chip_class == GFX6)
                --max_offchip_buffers_per_se;
 
        max_offchip_buffers = max_offchip_buffers_per_se *
@@ -2363,9 +2374,12 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
        case GFX7:
        case GFX8:
        case GFX9:
-       default:
                max_offchip_buffers = MIN2(max_offchip_buffers, 508);
                break;
+       case GFX10:
+               break;
+       default:
+               break;
        }
 
        *max_offchip_buffers_p = max_offchip_buffers;
@@ -2484,14 +2498,14 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
        if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
-                                  R_00B230_SPI_SHADER_USER_DATA_GS_0,
-                                  R_00B430_SPI_SHADER_USER_DATA_HS_0};
+                                  R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+                                  R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
 
                for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
                        radv_emit_shader_pointer(queue->device, cs, regs[i],
                                                 va, true);
                }
-       } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
                uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
                                   R_00B130_SPI_SHADER_USER_DATA_VS_0,
                                   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
@@ -2741,6 +2755,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
                        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
                        radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
                }
@@ -4393,15 +4408,15 @@ radv_initialise_color_surface(struct radv_device *device,
 
                cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
                cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-               cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
+               cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
 
                cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
 
                if (radv_image_has_fmask(iview->image)) {
                        if (device->physical_device->rad_info.chip_class >= GFX7)
-                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
-                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
-                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
+                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
+                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
+                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
                } else {
                        /* This must be set for fast clear to work without FMASK. */
                        if (device->physical_device->rad_info.chip_class >= GFX7)
@@ -4413,7 +4428,7 @@ radv_initialise_color_surface(struct radv_device *device,
 
        /* CMASK variables */
        va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-       va += iview->image->cmask.offset;
+       va += iview->image->cmask_offset;
        cb->cb_color_cmask = va >> 8;
 
        va = radv_buffer_get_va(iview->bo) + iview->image->offset;
@@ -4423,8 +4438,11 @@ radv_initialise_color_surface(struct radv_device *device,
            device->physical_device->rad_info.chip_class <= GFX8)
                va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
 
+       unsigned dcc_tile_swizzle = surf->tile_swizzle;
+       dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
        cb->cb_dcc_base = va >> 8;
-       cb->cb_dcc_base |= surf->tile_swizzle;
+       cb->cb_dcc_base |= dcc_tile_swizzle;
 
        /* GFX10 field has the same base shift as the GFX6 field. */
        uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
@@ -4439,9 +4457,9 @@ radv_initialise_color_surface(struct radv_device *device,
        }
 
        if (radv_image_has_fmask(iview->image)) {
-               va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
+               va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset;
                cb->cb_color_fmask = va >> 8;
-               cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
+               cb->cb_color_fmask |= surf->fmask_tile_swizzle;
        } else {
                cb->cb_color_fmask = cb->cb_color_base;
        }
@@ -4491,7 +4509,7 @@ radv_initialise_color_surface(struct radv_device *device,
        if (radv_image_has_fmask(iview->image)) {
                cb->cb_color_info |= S_028C70_COMPRESSION(1);
                if (device->physical_device->rad_info.chip_class == GFX6) {
-                       unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
+                       unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
                        cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
                }