X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_device.c;h=cb83cde08cfca85eba97679841d853ed730d94a1;hb=c179ded9cb1bc3e42b887c1d3362c86befc3bbcc;hp=b397a9a8aa06b7576b704891e1a66f7b992c307e;hpb=24b1b1f5745d14865d1e1947a8882d5ed58470b4;p=mesa.git diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b397a9a8aa0..cb83cde08cf 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -354,8 +354,7 @@ radv_physical_device_init(struct radv_physical_device *device, /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs * on GFX6. */ - device->has_clear_state = device->rad_info.chip_class >= GFX7 && - device->rad_info.chip_class <= GFX9; + device->has_clear_state = device->rad_info.chip_class >= GFX7; device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8; @@ -384,6 +383,23 @@ radv_physical_device_init(struct radv_physical_device *device, device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT; + /* Determine the number of threads per wave for all stages. */ + device->cs_wave_size = 64; + device->ps_wave_size = 64; + device->ge_wave_size = 64; + + if (device->rad_info.chip_class >= GFX10) { + if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32) + device->cs_wave_size = 32; + + /* For pixel shaders, wave64 is recommanded. */ + if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32) + device->ps_wave_size = 32; + + if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) + device->ge_wave_size = 32; + } + radv_physical_device_init_mem_types(device); radv_fill_device_extension_table(device, &device->supported_extensions); @@ -495,6 +511,9 @@ static const struct debug_control radv_perftest_options[] = { {"bolist", RADV_PERFTEST_BO_LIST}, {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT}, {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK}, + {"cswave32", RADV_PERFTEST_CS_WAVE_32}, + {"pswave32", RADV_PERFTEST_PS_WAVE_32}, + {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {NULL, 0} }; @@ -954,11 +973,8 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext; - /* TODO: Enable this once the driver supports 64-bit - * compare&swap atomic operations. - */ - features->shaderBufferInt64Atomics = false; - features->shaderSharedInt64Atomics = false; + features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900; + features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { @@ -988,6 +1004,12 @@ void radv_GetPhysicalDeviceFeatures2( features->uniformBufferStandardLayout = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { + VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = + (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; + features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8; + break; + } default: break; } @@ -1340,10 +1362,7 @@ void radv_GetPhysicalDeviceProperties2( (VkPhysicalDeviceDriverPropertiesKHR *) ext; driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR; - memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR); - strcpy(driver_props->driverName, "radv"); - - memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR); + snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv"); snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (LLVM " MESA_LLVM_VERSION_STRING ")"); @@ -1900,8 +1919,7 @@ VkResult radv_CreateDevice( } } - /* TODO: Enable binning for GFX10. */ - device->pbb_allowed = device->physical_device->rad_info.chip_class == GFX9 && + device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); /* Disabled and not implemented for now. */ @@ -1929,7 +1947,8 @@ VkResult radv_CreateDevice( device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64); - device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); + device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | + S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32); if (device->physical_device->rad_info.chip_class >= GFX7) { /* If the KMD allows it (there is a KMD hw register for it), @@ -2158,16 +2177,14 @@ fill_geom_tess_rings(struct radv_queue *queue, index stride 64 */ desc[0] = esgs_va; desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | - S_008F04_STRIDE(0) | S_008F04_SWIZZLE_ENABLE(true); desc[2] = esgs_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(1) | S_008F0C_INDEX_STRIDE(3) | - S_008F0C_ADD_TID_ENABLE(true); + S_008F0C_ADD_TID_ENABLE(1); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2175,24 +2192,20 @@ fill_geom_tess_rings(struct radv_queue *queue, S_008F0C_RESOURCE_LEVEL(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1); } /* GS entry for ES->GS ring */ /* stride 0, num records - size, elsize0, index stride 0 */ desc[4] = esgs_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32); desc[6] = esgs_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2213,17 +2226,12 @@ fill_geom_tess_rings(struct radv_queue *queue, /* stride 0, num records - size, elsize0, index stride 0 */ desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32); desc[2] = gsvs_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); if (queue->device->physical_device->rad_info.chip_class >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | @@ -2238,15 +2246,13 @@ fill_geom_tess_rings(struct radv_queue *queue, elsize 4, index stride 16 */ /* shader will patch stride and desc[2] */ desc[4] = gsvs_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); + desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | + S_008F04_SWIZZLE_ENABLE(1); desc[6] = 0; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_ELEMENT_SIZE(1) | S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true); @@ -2256,7 +2262,8 @@ fill_geom_tess_rings(struct radv_queue *queue, S_008F0C_RESOURCE_LEVEL(1); } else { desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1); } } @@ -2268,9 +2275,7 @@ fill_geom_tess_rings(struct radv_queue *queue, uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset; desc[0] = tess_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); desc[2] = tess_factor_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -2287,9 +2292,7 @@ fill_geom_tess_rings(struct radv_queue *queue, } desc[4] = tess_offchip_va; - desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); + desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); desc[6] = tess_offchip_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -2502,7 +2505,7 @@ radv_emit_global_shader_pointers(struct radv_queue *queue, radv_emit_shader_pointer(queue->device, cs, regs[i], va, true); } - } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) { + } else if (queue->device->physical_device->rad_info.chip_class == GFX9) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, @@ -2753,10 +2756,8 @@ radv_get_preamble_cs(struct radv_queue *queue, radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - if (queue->device->physical_device->rad_info.chip_class < GFX10) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); - } + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, @@ -4407,15 +4408,15 @@ radv_initialise_color_surface(struct radv_device *device, cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); - cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max; + cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max; cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); if (radv_image_has_fmask(iview->image)) { if (device->physical_device->rad_info.chip_class >= GFX7) - cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); - cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); - cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); + cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1); + cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index); + cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ if (device->physical_device->rad_info.chip_class >= GFX7) @@ -4427,7 +4428,7 @@ radv_initialise_color_surface(struct radv_device *device, /* CMASK variables */ va = radv_buffer_get_va(iview->bo) + iview->image->offset; - va += iview->image->cmask.offset; + va += iview->image->cmask_offset; cb->cb_color_cmask = va >> 8; va = radv_buffer_get_va(iview->bo) + iview->image->offset; @@ -4456,9 +4457,9 @@ radv_initialise_color_surface(struct radv_device *device, } if (radv_image_has_fmask(iview->image)) { - va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset; cb->cb_color_fmask = va >> 8; - cb->cb_color_fmask |= iview->image->fmask.tile_swizzle; + cb->cb_color_fmask |= surf->fmask_tile_swizzle; } else { cb->cb_color_fmask = cb->cb_color_base; } @@ -4508,7 +4509,7 @@ radv_initialise_color_surface(struct radv_device *device, if (radv_image_has_fmask(iview->image)) { cb->cb_color_info |= S_028C70_COMPRESSION(1); if (device->physical_device->rad_info.chip_class == GFX6) { - unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); + unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); }