X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_device.c;h=cb83cde08cfca85eba97679841d853ed730d94a1;hb=c179ded9cb1bc3e42b887c1d3362c86befc3bbcc;hp=f35d6ec4c3219ab48c908e0234593799511129fa;hpb=c90f46700dd2739ed9abb1246880d4829fdb3252;p=mesa.git diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index f35d6ec4c32..cb83cde08cf 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -362,6 +362,8 @@ radv_physical_device_init(struct radv_physical_device *device, device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || device->rad_info.family == CHIP_RAVEN; + device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10; + /* Out-of-order primitive rasterization. */ device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 && device->rad_info.max_se >= 2; @@ -376,10 +378,28 @@ radv_physical_device_init(struct radv_physical_device *device, (device->rad_info.chip_class >= GFX8 && device->rad_info.me_fw_feature >= 41); - device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2; + device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 || + device->rad_info.chip_class >= GFX10; device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT; + /* Determine the number of threads per wave for all stages. */ + device->cs_wave_size = 64; + device->ps_wave_size = 64; + device->ge_wave_size = 64; + + if (device->rad_info.chip_class >= GFX10) { + if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32) + device->cs_wave_size = 32; + + /* For pixel shaders, wave64 is recommanded. */ + if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32) + device->ps_wave_size = 32; + + if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) + device->ge_wave_size = 32; + } + radv_physical_device_init_mem_types(device); radv_fill_device_extension_table(device, &device->supported_extensions); @@ -472,6 +492,7 @@ static const struct debug_control radv_debug_options[] = { {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, {"nobinning", RADV_DEBUG_NOBINNING}, {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT}, + {"nongg", RADV_DEBUG_NO_NGG}, {NULL, 0} }; @@ -490,6 +511,9 @@ static const struct debug_control radv_perftest_options[] = { {"bolist", RADV_PERFTEST_BO_LIST}, {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT}, {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK}, + {"cswave32", RADV_PERFTEST_CS_WAVE_32}, + {"pswave32", RADV_PERFTEST_PS_WAVE_32}, + {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {NULL, 0} }; @@ -949,11 +973,8 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext; - /* TODO: Enable this once the driver supports 64-bit - * compare&swap atomic operations. - */ - features->shaderBufferInt64Atomics = false; - features->shaderSharedInt64Atomics = false; + features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900; + features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { @@ -983,6 +1004,12 @@ void radv_GetPhysicalDeviceFeatures2( features->uniformBufferStandardLayout = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { + VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = + (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; + features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8; + break; + } default: break; } @@ -1335,10 +1362,7 @@ void radv_GetPhysicalDeviceProperties2( (VkPhysicalDeviceDriverPropertiesKHR *) ext; driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR; - memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR); - strcpy(driver_props->driverName, "radv"); - - memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR); + snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv"); snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (LLVM " MESA_LLVM_VERSION_STRING ")"); @@ -1923,7 +1947,8 @@ VkResult radv_CreateDevice( device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64); - device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); + device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | + S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32); if (device->physical_device->rad_info.chip_class >= GFX7) { /* If the KMD allows it (there is a KMD hw register for it), @@ -2152,16 +2177,14 @@ fill_geom_tess_rings(struct radv_queue *queue, index stride 64 */ desc[0] = esgs_va; desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | - S_008F04_STRIDE(0) | S_008F04_SWIZZLE_ENABLE(true); desc[2] = esgs_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(1) | S_008F0C_INDEX_STRIDE(3) | - S_008F0C_ADD_TID_ENABLE(true); + S_008F0C_ADD_TID_ENABLE(1); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2169,24 +2192,20 @@ fill_geom_tess_rings(struct radv_queue *queue, S_008F0C_RESOURCE_LEVEL(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1); } /* GS entry for ES->GS ring */ /* stride 0, num records - size, elsize0, index stride 0 */ desc[4] = esgs_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32); desc[6] = esgs_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2207,17 +2226,12 @@ fill_geom_tess_rings(struct radv_queue *queue, /* stride 0, num records - size, elsize0, index stride 0 */ desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32); desc[2] = gsvs_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2232,15 +2246,13 @@ fill_geom_tess_rings(struct radv_queue *queue, elsize 4, index stride 16 */ /* shader will patch stride and desc[2] */ desc[4] = gsvs_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); + desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | + S_008F04_SWIZZLE_ENABLE(1); desc[6] = 0; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(1) | S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true); @@ -2250,7 +2262,8 @@ fill_geom_tess_rings(struct radv_queue *queue, S_008F0C_RESOURCE_LEVEL(1); } else { desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1); } } @@ -2262,9 +2275,7 @@ fill_geom_tess_rings(struct radv_queue *queue, uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset; desc[0] = tess_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); desc[2] = tess_factor_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -2281,9 +2292,7 @@ fill_geom_tess_rings(struct radv_queue *queue, } desc[4] = tess_offchip_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); desc[6] = tess_offchip_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -2337,9 +2346,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff * * Follow AMDVLK here. */ - if (device->physical_device->rad_info.family == CHIP_VEGA10 || - device->physical_device->rad_info.chip_class == GFX7 || - device->physical_device->rad_info.chip_class == GFX6) + if (device->physical_device->rad_info.chip_class >= GFX10) { + max_offchip_buffers_per_se = 256; + } else if (device->physical_device->rad_info.family == CHIP_VEGA10 || + device->physical_device->rad_info.chip_class == GFX7 || + device->physical_device->rad_info.chip_class == GFX6) --max_offchip_buffers_per_se; max_offchip_buffers = max_offchip_buffers_per_se * @@ -2363,9 +2374,12 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff case GFX7: case GFX8: case GFX9: - default: max_offchip_buffers = MIN2(max_offchip_buffers, 508); break; + case GFX10: + break; + default: + break; } *max_offchip_buffers_p = max_offchip_buffers; @@ -2484,14 +2498,14 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, if (queue->device->physical_device->rad_info.chip_class >= GFX10) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, - R_00B230_SPI_SHADER_USER_DATA_GS_0, - R_00B430_SPI_SHADER_USER_DATA_HS_0}; + R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, + R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radv_emit_shader_pointer(queue->device, cs, regs[i], va, true); } - } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) { + } else if (queue->device->physical_device->rad_info.chip_class == GFX9) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, @@ -2741,6 +2755,7 @@ radv_get_preamble_cs(struct radv_queue *queue, if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } @@ -4393,15 +4408,15 @@ radv_initialise_color_surface(struct radv_device *device, cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); - cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max; + cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max; cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); if (radv_image_has_fmask(iview->image)) { if (device->physical_device->rad_info.chip_class >= GFX7) - cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); - cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); - cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); + cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1); + cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index); + cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ if (device->physical_device->rad_info.chip_class >= GFX7) @@ -4413,7 +4428,7 @@ radv_initialise_color_surface(struct radv_device *device, /* CMASK variables */ va = radv_buffer_get_va(iview->bo) + iview->image->offset; - va += iview->image->cmask.offset; + va += iview->image->cmask_offset; cb->cb_color_cmask = va >> 8; va = radv_buffer_get_va(iview->bo) + iview->image->offset; @@ -4442,9 +4457,9 @@ radv_initialise_color_surface(struct radv_device *device, } if (radv_image_has_fmask(iview->image)) { - va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset; cb->cb_color_fmask = va >> 8; - cb->cb_color_fmask |= iview->image->fmask.tile_swizzle; + cb->cb_color_fmask |= surf->fmask_tile_swizzle; } else { cb->cb_color_fmask = cb->cb_color_base; } @@ -4494,7 +4509,7 @@ radv_initialise_color_surface(struct radv_device *device, if (radv_image_has_fmask(iview->image)) { cb->cb_color_info |= S_028C70_COMPRESSION(1); if (device->physical_device->rad_info.chip_class == GFX6) { - unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); + unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); }