#include "vk_format.h"
#include "sid.h"
#include "git_sha1.h"
-#include "gfx9d.h"
#include "util/build_id.h"
#include "util/debug.h"
#include "util/mesa-sha1.h"
case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
- case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
+ case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
+ case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
+ case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
default: chip_string = "AMD RADV unknown"; break;
}
/* Override family and chip_class. */
device->rad_info.family = i;
- if (i >= CHIP_VEGA10)
+ if (i >= CHIP_NAVI10)
+ device->rad_info.chip_class = GFX10;
+ else if (i >= CHIP_VEGA10)
device->rad_info.chip_class = GFX9;
else if (i >= CHIP_TONGA)
device->rad_info.chip_class = GFX8;
(device->rad_info.chip_class >= GFX8 &&
device->rad_info.me_fw_feature >= 41);
+ device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
+ device->rad_info.chip_class >= GFX10;
+
+ device->use_shader_ballot = device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+
radv_physical_device_init_mem_types(device);
radv_fill_device_extension_table(device, &device->supported_extensions);
{"localbos", RADV_PERFTEST_LOCAL_BOS},
{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
{"bolist", RADV_PERFTEST_BO_LIST},
+ {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
+ {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
{NULL, 0}
};
properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
properties->maxTransformFeedbackBufferDataStride = 512;
properties->transformFeedbackQueries = true;
- properties->transformFeedbackStreamsLinesTriangles = false;
+ properties->transformFeedbackStreamsLinesTriangles = true;
properties->transformFeedbackRasterizationStreamSelect = false;
properties->transformFeedbackDraw = true;
break;
props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+ (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+ properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT;
+ properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
+ properties->sampleLocationCoordinateRange[0] = 0.0f;
+ properties->sampleLocationCoordinateRange[1] = 0.9375f;
+ properties->sampleLocationSubPixelBits = 4;
+ properties->variableSampleLocations = VK_FALSE;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
+ VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
+ (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
+
+ /* We support all of the depth resolve modes */
+ properties->supportedDepthResolveModes =
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
+ VK_RESOLVE_MODE_MIN_BIT_KHR |
+ VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+ /* Average doesn't make sense for stencil so we don't support that */
+ properties->supportedStencilResolveModes =
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+ VK_RESOLVE_MODE_MIN_BIT_KHR |
+ VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+ properties->independentResolveNone = VK_TRUE;
+ properties->independentResolve = VK_TRUE;
+ break;
+ }
default:
break;
}
* Note that the application heap usages are not really accurate (eg.
* in presence of shared buffers).
*/
- if (vram_size) {
- heap_usage = device->ws->query_value(device->ws,
- RADEON_ALLOCATED_VRAM);
+ for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
+ uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
- heap_budget = vram_size -
- device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
- heap_usage;
+ switch (device->mem_type_indices[i]) {
+ case RADV_MEM_TYPE_VRAM:
+ heap_usage = device->ws->query_value(device->ws,
+ RADEON_ALLOCATED_VRAM);
- memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget;
- memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage;
- }
+ heap_budget = vram_size -
+ device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
+ heap_usage;
- if (visible_vram_size) {
- heap_usage = device->ws->query_value(device->ws,
- RADEON_ALLOCATED_VRAM_VIS);
-
- heap_budget = visible_vram_size -
- device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
- heap_usage;
+ memoryBudget->heapBudget[heap_index] = heap_budget;
+ memoryBudget->heapUsage[heap_index] = heap_usage;
+ break;
+ case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
+ heap_usage = device->ws->query_value(device->ws,
+ RADEON_ALLOCATED_VRAM_VIS);
- memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget;
- memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage;
- }
+ heap_budget = visible_vram_size -
+ device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
+ heap_usage;
- if (gtt_size) {
- heap_usage = device->ws->query_value(device->ws,
- RADEON_ALLOCATED_GTT);
+ memoryBudget->heapBudget[heap_index] = heap_budget;
+ memoryBudget->heapUsage[heap_index] = heap_usage;
+ break;
+ case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
+ heap_usage = device->ws->query_value(device->ws,
+ RADEON_ALLOCATED_GTT);
- heap_budget = gtt_size -
- device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
- heap_usage;
+ heap_budget = gtt_size -
+ device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
+ heap_usage;
- memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget;
- memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage;
+ memoryBudget->heapBudget[heap_index] = heap_budget;
+ memoryBudget->heapUsage[heap_index] = heap_usage;
+ break;
+ default:
+ break;
+ }
}
/* The heapBudget and heapUsage values must be zero for array elements
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
S_008F0C_ELEMENT_SIZE(1) |
S_008F0C_INDEX_STRIDE(3) |
S_008F0C_ADD_TID_ENABLE(true);
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
/* GS entry for ES->GS ring */
/* stride 0, num records - size, elsize0,
index stride 0 */
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
S_008F0C_ELEMENT_SIZE(0) |
S_008F0C_INDEX_STRIDE(0) |
S_008F0C_ADD_TID_ENABLE(false);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
}
desc += 8;
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
S_008F0C_ELEMENT_SIZE(0) |
S_008F0C_INDEX_STRIDE(0) |
S_008F0C_ADD_TID_ENABLE(false);
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
/* stride gsvs_itemsize, num records 64
elsize 4, index stride 16 */
/* shader will patch stride and desc[2] */
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
S_008F0C_ELEMENT_SIZE(1) |
S_008F0C_INDEX_STRIDE(1) |
S_008F0C_ADD_TID_ENABLE(true);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
}
desc += 8;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
desc[4] = tess_offchip_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(0) |
- S_008F0C_INDEX_STRIDE(0) |
- S_008F0C_ADD_TID_ENABLE(false);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
}
desc += 8;
S_030938_SIZE(tf_ring_size / 4));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
tf_va >> 8);
- if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
+ S_030984_BASE_HI(tf_va >> 40));
+ } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
S_030944_BASE_HI(tf_va >> 40));
}
radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
- if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radv_emit_shader_pointer(queue->device, cs, regs[i],
+ va, true);
+ }
+ } else if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
R_00B130_SPI_SHADER_USER_DATA_VS_0,
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
queue->device->physical_device->rad_info.chip_class >= GFX7,
(queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_SCACHE |
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
queue->queue_family_index == RING_COMPUTE &&
queue->device->physical_device->rad_info.chip_class >= GFX7,
RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_INV_SCACHE |
+ RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
}
unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
unsigned max_compressed_block_size;
+ unsigned independent_128b_blocks;
unsigned independent_64b_blocks;
- if (!radv_image_has_dcc(iview->image))
+ if (!radv_dcc_enabled(iview->image, iview->base_mip))
return 0;
- if (iview->image->info.samples > 1) {
- if (iview->image->planes[0].surface.bpe == 1)
- max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- else if (iview->image->planes[0].surface.bpe == 2)
- max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
- }
-
if (!device->physical_device->rad_info.has_dedicated_vram) {
/* amdvlk: [min-compressed-block-size] should be set to 32 for
* dGPU and 64 for APU because all of our APUs to date use
min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
}
- if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
- /* If this DCC image is potentially going to be used in texture
- * fetches, we need some special settings.
- */
- independent_64b_blocks = 1;
- max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- } else {
- /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
- * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
- * big as possible for better compression state.
- */
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
independent_64b_blocks = 0;
- max_compressed_block_size = max_uncompressed_block_size;
+ independent_128b_blocks = 1;
+ } else {
+ independent_128b_blocks = 0;
+
+ if (iview->image->info.samples > 1) {
+ if (iview->image->planes[0].surface.bpe == 1)
+ max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ else if (iview->image->planes[0].surface.bpe == 2)
+ max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
+ }
+
+ if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ /* If this DCC image is potentially going to be used in texture
+ * fetches, we need some special settings.
+ */
+ independent_64b_blocks = 1;
+ max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ } else {
+ /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
+ * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
+ * big as possible for better compression state.
+ */
+ independent_64b_blocks = 0;
+ max_compressed_block_size = max_uncompressed_block_size;
+ }
}
return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
- S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
+ S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
+ S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
}
static void
else
meta = surf->u.gfx9.cmask;
- cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
- S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
- S_028C74_RB_ALIGNED(meta.rb_aligned) |
- S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+ S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+ S_028EE0_CMASK_PIPE_ALIGNED(surf->u.gfx9.cmask.pipe_aligned) |
+ S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
+ } else {
+ cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+ S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+ S_028C74_RB_ALIGNED(meta.rb_aligned) |
+ S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+ cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
+ }
cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
cb->cb_color_base |= surf->tile_swizzle;
-
- cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
} else {
const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
va = radv_buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
+
+ if (radv_dcc_enabled(iview->image, iview->base_mip) &&
+ device->physical_device->rad_info.chip_class <= GFX8)
+ va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
+
+ unsigned dcc_tile_swizzle = surf->tile_swizzle;
+ dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
cb->cb_dcc_base = va >> 8;
- cb->cb_dcc_base |= surf->tile_swizzle;
+ cb->cb_dcc_base |= dcc_tile_swizzle;
+ /* GFX10 field has the same base shift as the GFX6 field. */
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
- S_028C6C_SLICE_MAX(max_slice);
+ S_028C6C_SLICE_MAX_GFX10(max_slice);
if (iview->image->info.samples > 1) {
unsigned log_samples = util_logbase2(iview->image->info.samples);
unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
+
+ if (radv_image_is_tc_compat_cmask(iview->image)) {
+ /* Allow the texture block to read FMASK directly
+ * without decompressing it. This bit must be cleared
+ * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
+ * otherwise the operation doesn't happen.
+ */
+ cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
+
+ /* Set CMASK into a tiling format that allows the
+ * texture block to read it.
+ */
+ cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
+ }
}
if (radv_image_has_cmask(iview->image) &&
unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
- cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
- cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
- S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
+
+ cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
+ S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
+ S_028EE0_RESOURCE_LEVEL(1);
+ } else {
+ cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
+ cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
+ S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
+ }
+
cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
S_028C68_MIP0_HEIGHT(height - 1) |
S_028C68_MAX_MIP(iview->image->info.levels - 1);
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
S_028008_SLICE_MAX(max_slice);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
+ S_028008_SLICE_MAX_HI(max_slice >> 11);
+ }
ds->db_htile_data_base = 0;
ds->db_htile_surface = 0;
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
- ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
- ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
- ds->db_depth_view |= S_028008_MIPID(level);
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
+ ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
+ }
+ ds->db_depth_view |= S_028008_MIPID(level);
ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
S_02801C_Y_MAX(iview->image->info.height - 1);
unsigned max_zplanes =
radv_calc_decompress_on_z_planes(device, iview);
- ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
- S_028038_ITERATE_FLUSH(1);
- ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+ ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
+ ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
+ } else {
+ ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
+ ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+ }
}
if (!surf->has_stencil)
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
- S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned) |
- S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned);
+ S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned);
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ ds->db_htile_surface |= S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned);
+ }
}
} else {
const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
{
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
- bool is_vi = (device->physical_device->rad_info.chip_class >= GFX8);
+ bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
+ device->physical_device->rad_info.chip_class == GFX9;
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
S_008F30_ANISO_BIAS(max_aniso_ratio) |
S_008F30_DISABLE_CUBE_WRAP(0) |
- S_008F30_COMPAT_MODE(is_vi) |
+ S_008F30_COMPAT_MODE(compat_mode) |
S_008F30_FILTER_MODE(filter_mode));
sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
- S_008F38_MIP_POINT_PRECLAMP(0) |
- S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
- S_008F38_FILTER_PREC_FIX(1) |
- S_008F38_ANISO_OVERRIDE(is_vi));
+ S_008F38_MIP_POINT_PRECLAMP(0));
sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
+ } else {
+ sampler->state[2] |=
+ S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
+ S_008F38_FILTER_PREC_FIX(1) |
+ S_008F38_ANISO_OVERRIDE_GFX6(device->physical_device->rad_info.chip_class >= GFX8);
+ }
}
VkResult radv_CreateSampler(
return VK_SUCCESS;
}
+
+void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
+ VkPhysicalDevice physicalDevice,
+ VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT* pMultisampleProperties)
+{
+ if (samples & (VK_SAMPLE_COUNT_2_BIT |
+ VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT)) {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
+ } else {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
+ }
+}