#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
-#include <llvm/Config/llvm-config.h>
#include "radv_debug.h"
#include "radv_private.h"
device->use_ngg = device->rad_info.chip_class >= GFX10 &&
device->rad_info.family != CHIP_NAVI14 &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
- if (device->use_aco && device->use_ngg) {
- fprintf(stderr, "WARNING: disabling NGG because ACO is used.\n");
- device->use_ngg = false;
- }
device->use_ngg_streamout = false;
* uninitialized data in an indirect draw.
*/
instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+ } else if (!strcmp(name, "No Man's Sky")) {
+ /* Work around a NMS game bug */
+ instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
}
}
instance->enabled_extensions.extensions[index] = true;
}
+ bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
+ /* Vulkan requires that entrypoints for extensions which have
+ * not been enabled must not be advertised.
+ */
+ if (!unchecked &&
+ !radv_instance_entrypoint_is_enabled(i, instance->apiVersion,
+ &instance->enabled_extensions)) {
+ instance->dispatch.entrypoints[i] = NULL;
+ } else {
+ instance->dispatch.entrypoints[i] =
+ radv_instance_dispatch_table.entrypoints[i];
+ }
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(instance->physical_device_dispatch.entrypoints); i++) {
+ /* Vulkan requires that entrypoints for extensions which have
+ * not been enabled must not be advertised.
+ */
+ if (!unchecked &&
+ !radv_physical_device_entrypoint_is_enabled(i, instance->apiVersion,
+ &instance->enabled_extensions)) {
+ instance->physical_device_dispatch.entrypoints[i] = NULL;
+ } else {
+ instance->physical_device_dispatch.entrypoints[i] =
+ radv_physical_device_dispatch_table.entrypoints[i];
+ }
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
+ /* Vulkan requires that entrypoints for extensions which have
+ * not been enabled must not be advertised.
+ */
+ if (!unchecked &&
+ !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
+ &instance->enabled_extensions, NULL)) {
+ instance->device_dispatch.entrypoints[i] = NULL;
+ } else {
+ instance->device_dispatch.entrypoints[i] =
+ radv_device_dispatch_table.entrypoints[i];
+ }
+ }
+
result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
if (result != VK_SUCCESS) {
vk_free2(&default_alloc, pAllocator, instance);
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
- bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->storageBuffer16BitAccess = enabled;
- features->uniformAndStorageBuffer16BitAccess = enabled;
- features->storagePushConstant16 = enabled;
- features->storageInputOutput16 = enabled && LLVM_VERSION_MAJOR >= 9;
+ features->storageBuffer16BitAccess = !pdevice->use_aco;
+ features->uniformAndStorageBuffer16BitAccess = !pdevice->use_aco;
+ features->storagePushConstant16 = !pdevice->use_aco;
+ features->storageInputOutput16 = pdevice->rad_info.has_double_rate_fp16 && !pdevice->use_aco && LLVM_VERSION_MAJOR >= 9;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
VkPhysicalDevice8BitStorageFeatures *features =
(VkPhysicalDevice8BitStorageFeatures *)ext;
- bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->storageBuffer8BitAccess = enabled;
- features->uniformAndStorageBuffer8BitAccess = enabled;
- features->storagePushConstant8 = enabled;
+ features->storageBuffer8BitAccess = !pdevice->use_aco;
+ features->uniformAndStorageBuffer8BitAccess = !pdevice->use_aco;
+ features->storagePushConstant8 = !pdevice->use_aco;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
VkPhysicalDeviceShaderFloat16Int8Features *features =
(VkPhysicalDeviceShaderFloat16Int8Features*)ext;
- features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+ features->shaderFloat16 = pdevice->rad_info.has_double_rate_fp16 && !pdevice->use_aco;
features->shaderInt8 = !pdevice->use_aco;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
- features->shaderSubgroupExtendedTypes = true;
+ features->shaderSubgroupExtendedTypes = !pdevice->use_aco;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
VkPhysicalDeviceVulkan11Features *features =
(VkPhysicalDeviceVulkan11Features *)ext;
- features->storageBuffer16BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->uniformAndStorageBuffer16BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->storagePushConstant16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->storageInputOutput16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco && LLVM_VERSION_MAJOR >= 9;
+ features->storageBuffer16BitAccess = !pdevice->use_aco;
+ features->uniformAndStorageBuffer16BitAccess = !pdevice->use_aco;
+ features->storagePushConstant16 = !pdevice->use_aco;
+ features->storageInputOutput16 = pdevice->rad_info.has_double_rate_fp16 && !pdevice->use_aco && LLVM_VERSION_MAJOR >= 9;
features->multiview = true;
features->multiviewGeometryShader = true;
features->multiviewTessellationShader = true;
(VkPhysicalDeviceVulkan12Features *)ext;
features->samplerMirrorClampToEdge = true;
features->drawIndirectCount = true;
- features->storageBuffer8BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->uniformAndStorageBuffer8BitAccess = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
- features->storagePushConstant8 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+ features->storageBuffer8BitAccess = !pdevice->use_aco;
+ features->uniformAndStorageBuffer8BitAccess = !pdevice->use_aco;
+ features->storagePushConstant8 = !pdevice->use_aco;
features->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9;
features->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9;
- features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+ features->shaderFloat16 = pdevice->rad_info.has_double_rate_fp16 && !pdevice->use_aco;
features->shaderInt8 = !pdevice->use_aco;
features->descriptorIndexing = true;
features->shaderInputAttachmentArrayDynamicIndexing = true;
features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
features->imagelessFramebuffer = true;
features->uniformBufferStandardLayout = true;
- features->shaderSubgroupExtendedTypes = true;
+ features->shaderSubgroupExtendedTypes = !pdevice->use_aco;
features->separateDepthStencilLayouts = true;
features->hostQueryReset = true;
features->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
.bufferImageGranularity = 64, /* A cache line */
- .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
+ .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = max_descriptor_set_size,
.maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT;
- if (pdevice->rad_info.chip_class == GFX8 ||
- pdevice->rad_info.chip_class == GFX9) {
+ if (((pdevice->rad_info.chip_class == GFX6 ||
+ pdevice->rad_info.chip_class == GFX7) && !pdevice->use_aco) ||
+ pdevice->rad_info.chip_class >= GFX8) {
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
}
/* On AMD hardware, denormals and rounding modes for fp16/fp64 are
* controlled by the same config register.
*/
- p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
- p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+ if (pdevice->rad_info.has_double_rate_fp16) {
+ p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+ p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+ } else {
+ p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+ p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+ }
/* Do not allow both preserving and flushing denorms because different
* shaders in the same pipeline can have different settings and this
p->shaderSignedZeroInfNanPreserveFloat32 = true;
p->shaderDenormFlushToZeroFloat16 = false;
- p->shaderDenormPreserveFloat16 = pdevice->rad_info.chip_class >= GFX8;
- p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.chip_class >= GFX8;
+ p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_double_rate_fp16;
+ p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_double_rate_fp16;
p->shaderRoundingModeRTZFloat16 = false;
- p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.chip_class >= GFX8;
+ p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_double_rate_fp16;
p->shaderDenormFlushToZeroFloat64 = false;
p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
properties->vgprsPerSimd =
pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
properties->minVgprAllocation =
- pdevice->rad_info.min_vgpr_alloc;
+ pdevice->rad_info.min_wave64_vgpr_alloc;
properties->maxVgprAllocation =
pdevice->rad_info.max_vgpr_alloc;
properties->vgprAllocationGranularity =
- pdevice->rad_info.vgpr_alloc_granularity;
+ pdevice->rad_info.wave64_vgpr_alloc_granularity;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
return VK_SUCCESS;
}
+static void
+radv_device_init_dispatch(struct radv_device *device)
+{
+ const struct radv_instance *instance = device->physical_device->instance;
+ const struct radv_device_dispatch_table *dispatch_table_layer = NULL;
+ bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
+ int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
+
+ if (radv_thread_trace >= 0) {
+ /* Use device entrypoints from the SQTT layer if enabled. */
+ dispatch_table_layer = &sqtt_device_dispatch_table;
+ }
+
+ for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) {
+ /* Vulkan requires that entrypoints for extensions which have not been
+ * enabled must not be advertised.
+ */
+ if (!unchecked &&
+ !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
+ &instance->enabled_extensions,
+ &device->enabled_extensions)) {
+ device->dispatch.entrypoints[i] = NULL;
+ } else if (dispatch_table_layer &&
+ dispatch_table_layer->entrypoints[i]) {
+ device->dispatch.entrypoints[i] =
+ dispatch_table_layer->entrypoints[i];
+ } else {
+ device->dispatch.entrypoints[i] =
+ radv_device_dispatch_table.entrypoints[i];
+ }
+ }
+}
+
static VkResult
radv_create_pthread_cond(pthread_cond_t *cond)
{
device->enabled_extensions.extensions[index] = true;
}
+ radv_device_init_dispatch(device);
+
keep_shader_info = device->enabled_extensions.AMD_shader_info;
/* With update after bind we can't attach bo's to the command buffer
int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
if (radv_thread_trace >= 0) {
- fprintf(stderr, "*****************************************************************************\n");
- fprintf(stderr, "* WARNING: Thread trace support is experimental and only supported on GFX9! *\n");
- fprintf(stderr, "*****************************************************************************\n");
-
- /* TODO: add support for more ASICs. */
- assert(device->physical_device->rad_info.chip_class == GFX9);
+ fprintf(stderr, "*************************************************\n");
+ fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
+ fprintf(stderr, "*************************************************\n");
+
+ if (device->physical_device->rad_info.chip_class < GFX8) {
+ fprintf(stderr, "GPU hardware not supported: refer to "
+ "the RGP documentation for the list of "
+ "supported GPUs!\n");
+ abort();
+ }
/* Default buffer size set to 1MB per SE. */
device->thread_trace_buffer_size =
int ret;
result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
- 0, NULL, NULL);
+ 0, NULL, VK_NULL_HANDLE);
if (result != VK_SUCCESS)
return false;
const char* pName)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
- bool unchecked = instance ? instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
- if (unchecked) {
- return radv_lookup_entrypoint_unchecked(pName);
- } else {
- return radv_lookup_entrypoint_checked(pName,
- instance ? instance->apiVersion : 0,
- instance ? &instance->enabled_extensions : NULL,
- NULL);
- }
+ /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
+ * when we have to return valid function pointers, NULL, or it's left
+ * undefined. See the table for exact details.
+ */
+ if (pName == NULL)
+ return NULL;
+
+#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
+ if (strcmp(pName, "vk" #entrypoint) == 0) \
+ return (PFN_vkVoidFunction)radv_##entrypoint
+
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
+ LOOKUP_RADV_ENTRYPOINT(CreateInstance);
+
+#undef LOOKUP_RADV_ENTRYPOINT
+
+ if (instance == NULL)
+ return NULL;
+
+ int idx = radv_get_instance_entrypoint_index(pName);
+ if (idx >= 0)
+ return instance->dispatch.entrypoints[idx];
+
+ idx = radv_get_physical_device_entrypoint_index(pName);
+ if (idx >= 0)
+ return instance->physical_device_dispatch.entrypoints[idx];
+
+ idx = radv_get_device_entrypoint_index(pName);
+ if (idx >= 0)
+ return instance->device_dispatch.entrypoints[idx];
+
+ return NULL;
}
/* The loader wants us to expose a second GetInstanceProcAddr function
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
- return radv_lookup_physical_device_entrypoint_checked(pName,
- instance ? instance->apiVersion : 0,
- instance ? &instance->enabled_extensions : NULL);
+ if (!pName || !instance)
+ return NULL;
+
+ int idx = radv_get_physical_device_entrypoint_index(pName);
+ if (idx < 0)
+ return NULL;
+
+ return instance->physical_device_dispatch.entrypoints[idx];
}
PFN_vkVoidFunction radv_GetDeviceProcAddr(
const char* pName)
{
RADV_FROM_HANDLE(radv_device, device, _device);
- bool unchecked = device ? device->instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
- if (unchecked) {
- return radv_lookup_entrypoint_unchecked(pName);
- } else {
- return radv_lookup_entrypoint_checked(pName,
- device->instance->apiVersion,
- &device->instance->enabled_extensions,
- &device->enabled_extensions);
- }
+ if (!device || !pName)
+ return NULL;
+
+ int idx = radv_get_device_entrypoint_index(pName);
+ if (idx < 0)
+ return NULL;
+
+ return device->dispatch.entrypoints[idx];
}
bool radv_get_memory_fd(struct radv_device *device,
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_buffer *buffer;
+ if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
return 0;
}
+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
static void
radv_init_sampler(struct radv_device *device,
struct radv_sampler *sampler,