};
static const VkExtensionProperties common_device_extensions[] = {
+ {
+ .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
+ .specVersion = 1,
+ },
{
.extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
.specVersion = 1,
.extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
.specVersion = 1,
},
+ {
+ .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
+ .specVersion = 1,
+ },
{
.extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
.specVersion = 1,
instance->physicalDeviceCount = 0;
- max_devices = drmGetDevices2(0, devices, sizeof(devices));
+ max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
if (max_devices < 1)
return VK_ERROR_INCOMPATIBLE_DRIVER;
if (result == VK_SUCCESS)
++instance->physicalDeviceCount;
else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
- return result;
+ break;
}
}
+ drmFreeDevices(devices, max_devices);
+
return result;
}
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = false,
+ .pipelineStatisticsQuery = true,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
.shaderInt64 = false,
.shaderInt16 = false,
.sparseBinding = true,
- .variableMultisampleRate = false,
- .inheritedQueries = false,
+ .variableMultisampleRate = true,
+ .inheritedQueries = true,
};
}
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
VkSampleCountFlags sample_counts = 0xf;
+
+ /* make sure that the entire descriptor set is addressable with a signed
+ * 32-bit int. So the sum of all limits scaled by descriptor size has to
+ * be at most 2 GiB. the combined image & samples object count as one of
+ * both. This limit is for the pipeline layout, not for the set layout, but
+ * there is no set limit, so we just set a pipeline limit. I don't think
+ * any app is going to hit this soon. */
+ size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
+ (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
+ 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
+ 32 /* sampler, largest when combined with image */ +
+ 64 /* sampled image */ +
+ 64 /* storage image */);
+
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = (1 << 14),
.maxImageDimension2D = (1 << 14),
.bufferImageGranularity = 64, /* A cache line */
.sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
.maxBoundDescriptorSets = MAX_SETS,
- .maxPerStageDescriptorSamplers = (1u << 31) / 16,
- .maxPerStageDescriptorUniformBuffers = (1u << 31) / 16,
- .maxPerStageDescriptorStorageBuffers = (1u << 31) / 16,
- .maxPerStageDescriptorSampledImages = (1u << 31) / 96,
- .maxPerStageDescriptorStorageImages = (1u << 31) / 64,
- .maxPerStageDescriptorInputAttachments = (1u << 31) / 64,
- .maxPerStageResources = (1u << 31) / 32,
- .maxDescriptorSetSamplers = 256,
- .maxDescriptorSetUniformBuffers = (1u << 31) / 16,
- .maxDescriptorSetUniformBuffersDynamic = 8,
- .maxDescriptorSetStorageBuffers = (1u << 31) / 16,
- .maxDescriptorSetStorageBuffersDynamic = 8,
- .maxDescriptorSetSampledImages = (1u << 31) / 96,
- .maxDescriptorSetStorageImages = (1u << 31) / 64,
- .maxDescriptorSetInputAttachments = (1u << 31) / 64,
+ .maxPerStageDescriptorSamplers = max_descriptor_set_size,
+ .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
+ .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
+ .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
+ .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
+ .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
+ .maxPerStageResources = max_descriptor_set_size,
+ .maxDescriptorSetSamplers = max_descriptor_set_size,
+ .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
+ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
+ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetSampledImages = max_descriptor_set_size,
+ .maxDescriptorSetStorageImages = max_descriptor_set_size,
+ .maxDescriptorSetInputAttachments = max_descriptor_set_size,
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
.maxVertexInputAttributeOffset = 2047,
.maxTessellationControlTotalOutputComponents = 4096,
.maxTessellationEvaluationInputComponents = 128,
.maxTessellationEvaluationOutputComponents = 128,
- .maxGeometryShaderInvocations = 32,
+ .maxGeometryShaderInvocations = 127,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxGeometryOutputVertices = 256,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
- .timestampComputeAndGraphics = false,
- .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
+ .timestampComputeAndGraphics = true,
+ .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.deviceID = pdevice->rad_info.pci_id,
.deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
.limits = limits,
- .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
+ .sparseProperties = {0},
};
strcpy(pProperties->deviceName, pdevice->name);
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2KHR *pProperties)
{
- return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
+ radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
+
+ vk_foreach_struct(ext, pProperties->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
+ VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
+ (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
+ properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
+ break;
+ }
+ default:
+ break;
+ }
+ }
}
static void radv_get_physical_device_queue_family_properties(
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
device->gs_table_depth = 32;
return;
default:
break;
}
device->ws->cs_finalize(device->flush_cs[family]);
+
+ device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ case RADV_QUEUE_COMPUTE:
+ si_cs_emit_cache_flush(device->flush_shader_cs[family],
+ device->physical_device->rad_info.chip_class,
+ family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+ family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+ RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2);
+ break;
+ }
+ device->ws->cs_finalize(device->flush_shader_cs[family]);
}
if (getenv("RADV_TRACE_FILE")) {
device->ws->cs_destroy(device->empty_cs[i]);
if (device->flush_cs[i])
device->ws->cs_destroy(device->flush_cs[i]);
+ if (device->flush_shader_cs[i])
+ device->ws->cs_destroy(device->flush_shader_cs[i]);
}
radv_device_finish_meta(device);
for (uint32_t i = 0; i < submitCount; i++) {
struct radeon_winsys_cs **cs_array;
- bool do_flush = !i;
+ bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
bool can_patch = !do_flush;
uint32_t advance;
(pSubmits[i].commandBufferCount + do_flush));
if(do_flush)
- cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+ cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+ queue->device->flush_shader_cs[queue->queue_family_index] :
+ queue->device->flush_cs[queue->queue_family_index];
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
S_028C74_TILE_MODE_INDEX(tile_mode_index);
- if (iview->image->samples > 1) {
- unsigned log_samples = util_logbase2(iview->image->samples);
+ if (iview->image->info.samples > 1) {
+ unsigned log_samples = util_logbase2(iview->image->info.samples);
cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
S_028C74_NUM_FRAGMENTS(log_samples);
format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype) |
S_028C70_ENDIAN(endian);
- if (iview->image->samples > 1)
+ if (iview->image->info.samples > 1)
if (iview->image->fmask.size)
cb->cb_color_info |= S_028C70_COMPRESSION(1);
if (device->physical_device->rad_info.chip_class >= VI) {
unsigned max_uncompressed_block_size = 2;
- if (iview->image->samples > 1) {
+ if (iview->image->info.samples > 1) {
if (iview->image->surface.bpe == 1)
max_uncompressed_block_size = 0;
else if (iview->image->surface.bpe == 2)
unsigned format;
uint64_t va, s_offs, z_offs;
const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
+ bool stencil_only = false;
memset(ds, 0, sizeof(*ds));
switch (iview->vk_format) {
case VK_FORMAT_D24_UNORM_S8_UINT:
S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
ds->offset_scale = 1.0f;
break;
+ case VK_FORMAT_S8_UINT:
+ stencil_only = true;
+ level_info = &iview->image->surface.stencil_level[level];
+ break;
default:
break;
}
ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
- if (iview->image->samples > 1)
- ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
+ if (iview->image->info.samples > 1)
+ ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
+ if (stencil_only)
+ tile_mode = stencil_tile_mode;
+
ds->db_depth_info |=
S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
}
if (iview->image->surface.htile_size && !level) {
- ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
- S_028040_ALLOW_EXPCLEAR(1);
-
- if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
- /* Workaround: For a not yet understood reason, the
- * combination of MSAA, fast stencil clear and stencil
- * decompress messes with subsequent stencil buffer
- * uses. Problem was reproduced on Verde, Bonaire,
- * Tonga, and Carrizo.
- *
- * Disabling EXPCLEAR works around the problem.
- *
- * Check piglit's arb_texture_multisample-stencil-clear
- * test if you want to try changing this.
- */
- if (iview->image->samples <= 1)
- ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
- } else
+ ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+
+ if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);