memcpy(uuid, &mesa_timestamp, 4);
memcpy((char*)uuid + 4, &llvm_timestamp, 4);
memcpy((char*)uuid + 8, &f, 2);
- snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
+ snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv%zd", sizeof(void *));
return 0;
}
goto fail;
}
+ if (instance->enabled_extensions.KHR_display) {
+ master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
+ if (master_fd >= 0) {
+ uint32_t accel_working = 0;
+ struct drm_amdgpu_info request = {
+ .return_pointer = (uintptr_t)&accel_working,
+ .return_size = sizeof(accel_working),
+ .query = AMDGPU_INFO_ACCEL_WORKING
+ };
+
+ if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
+ close(master_fd);
+ master_fd = -1;
+ }
+ }
+ }
+
device->master_fd = master_fd;
device->local_fd = fd;
device->ws->query_info(device->ws, &device->rad_info);
{"errors", RADV_DEBUG_ERRORS},
{"startup", RADV_DEBUG_STARTUP},
{"checkir", RADV_DEBUG_CHECKIR},
+ {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
{NULL, 0}
};
*/
instance->perftest_flags |= RADV_PERFTEST_SISCHED;
}
+ } else if (!strcmp(name, "DOOM_VFR")) {
+ /* Work around a Doom VFR game bug */
+ instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
}
}
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
memset(pFeatures, 0, sizeof(*pFeatures));
*pFeatures = (VkPhysicalDeviceFeatures) {
.alphaToOne = true,
.multiViewport = true,
.samplerAnisotropy = true,
- .textureCompressionETC2 = false,
+ .textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 ||
+ pdevice->rad_info.family == CHIP_STONEY,
.textureCompressionASTC_LDR = false,
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2KHR *pFeatures)
{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
vk_foreach_struct(ext, pFeatures->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
- features->storageBuffer16BitAccess = false;
- features->uniformAndStorageBuffer16BitAccess = false;
- features->storagePushConstant16 = false;
- features->storageInputOutput16 = false;
+ bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
+ features->storageBuffer16BitAccess = enabled;
+ features->uniformAndStorageBuffer16BitAccess = enabled;
+ features->storagePushConstant16 = enabled;
+ features->storageInputOutput16 = enabled;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
features->runtimeDescriptorArray = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
+ VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
+ (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
+ features->conditionalRendering = true;
+ features->inheritedConditionalRendering = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+ VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+ (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
+ features->vertexAttributeInstanceRateDivisor = VK_TRUE;
+ features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
+ break;
+ }
default:
break;
}
properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+ VkPhysicalDeviceProtectedMemoryProperties *properties =
+ (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+ properties->protectedNoFault = false;
+ break;
+ }
default:
break;
}
}
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
- (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
+ ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
+ device->physical_device->rad_info.family == CHIP_RAVEN);
/* Disabled and not implemented for now. */
- device->dfsm_allowed = device->pbb_allowed && false;
+ device->dfsm_allowed = device->pbb_allowed &&
+ device->physical_device->rad_info.family == CHIP_RAVEN;
#ifdef ANDROID
device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
if (!radv_init_trace(device))
goto fail;
+ fprintf(stderr, "*****************************************************************************\n");
+ fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
+ fprintf(stderr, "*****************************************************************************\n");
+
fprintf(stderr, "Trace file will be dumped to %s\n", filename);
radv_dump_enabled_options(device, stderr);
}
device->physical_device->rad_info.family != CHIP_CARRIZO &&
device->physical_device->rad_info.family != CHIP_STONEY;
unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
- unsigned max_offchip_buffers = max_offchip_buffers_per_se *
- device->physical_device->rad_info.max_se;
+ unsigned max_offchip_buffers;
unsigned offchip_granularity;
unsigned hs_offchip_param;
+
+ /*
+ * Per RadeonSI:
+ * This must be one less than the maximum number due to a hw limitation.
+ * Various hardware bugs in SI, CIK, and GFX9 need this.
+ *
+ * Per AMDVLK:
+ * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+ * Gfx7 should limit max_offchip_buffers to 508
+ * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+ *
+ * Follow AMDVLK here.
+ */
+ if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+ device->physical_device->rad_info.chip_class == CIK ||
+ device->physical_device->rad_info.chip_class == SI)
+ --max_offchip_buffers_per_se;
+
+ max_offchip_buffers = max_offchip_buffers_per_se *
+ device->physical_device->rad_info.max_se;
+
switch (device->tess_offchip_block_dw_size) {
default:
assert(0);
return;
if (esgs_ring_bo)
- radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
if (gsvs_ring_bo)
- radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
if (queue->device->physical_device->rad_info.chip_class >= CIK) {
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
tf_va = radv_buffer_get_va(tess_rings_bo);
- radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
if (queue->device->physical_device->rad_info.chip_class >= CIK) {
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
scratch_va = radv_buffer_get_va(compute_scratch_bo);
- radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
radeon_emit(cs, scratch_va);
va = radv_buffer_get_va(descriptor_bo);
- radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
}
}
+static void
+radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
+{
+ struct radv_device *device = queue->device;
+
+ if (device->gfx_init) {
+ uint64_t va = radv_buffer_get_va(device->gfx_init);
+
+ radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
+
+ radv_cs_add_buffer(device->ws, cs, device->gfx_init);
+ } else {
+ struct radv_physical_device *physical_device = device->physical_device;
+ si_emit_graphics(physical_device, cs);
+ }
+}
+
+static void
+radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
+{
+ struct radv_physical_device *physical_device = queue->device->physical_device;
+ si_emit_compute(physical_device, cs);
+}
+
static VkResult
radv_get_preamble_cs(struct radv_queue *queue,
uint32_t scratch_size,
dest_cs[i] = cs;
if (scratch_bo)
- radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
+ radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
+
+ /* Emit initial configuration. */
+ switch (queue->queue_family_index) {
+ case RADV_QUEUE_GENERAL:
+ radv_init_graphics_state(cs, queue);
+ break;
+ case RADV_QUEUE_COMPUTE:
+ radv_init_compute_state(cs, queue);
+ break;
+ case RADV_QUEUE_TRANSFER:
+ break;
+ }
if (descriptor_bo != queue->descriptor_bo) {
uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
+ RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
queue->device->physical_device->rad_info.chip_class,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2);
+ RADV_CMD_FLAG_INV_GLOBAL_L2 |
+ RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
}
if (!queue->device->ws->cs_finalize(cs))
false, fence->fence);
radv_free_sem_info(&sem_info);
- /* TODO: find a better error */
if (ret)
- return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
return VK_SUCCESS;
}
if (fence) {
if (!fence_emitted) {
- radv_signal_fence(queue, fence);
+ result = radv_signal_fence(queue, fence);
+ if (result != VK_SUCCESS)
+ return result;
}
fence->submitted = true;
}
RADV_FROM_HANDLE(radv_queue, queue, _queue);
struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
bool fence_emitted = false;
+ VkResult result;
+ int ret;
for (uint32_t i = 0; i < bindInfoCount; ++i) {
struct radv_winsys_sem_info sem_info;
return result;
if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
- queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- &sem_info, NULL,
- false, base_fence);
+ ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
+ &queue->device->empty_cs[queue->queue_family_index],
+ 1, NULL, NULL,
+ &sem_info, NULL,
+ false, base_fence);
+ if (ret) {
+ radv_loge("failed to submit CS %d\n", i);
+ abort();
+ }
+
fence_emitted = true;
if (fence)
fence->submitted = true;
if (fence) {
if (!fence_emitted) {
- radv_signal_fence(queue, fence);
+ result = radv_signal_fence(queue, fence);
+ if (result != VK_SUCCESS)
+ return result;
}
fence->submitted = true;
}
if (!fence)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ fence->fence_wsi = NULL;
fence->submitted = false;
fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
fence->temp_syncobj = 0;
device->ws->destroy_syncobj(device->ws, fence->syncobj);
if (fence->fence)
device->ws->destroy_fence(fence->fence);
+ if (fence->fence_wsi)
+ fence->fence_wsi->destroy(fence->fence_wsi);
vk_free2(&device->alloc, pAllocator, fence);
}
{
for (uint32_t i = 0; i < fenceCount; ++i) {
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
- if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
+ if (fence->fence == NULL || fence->syncobj ||
+ fence->temp_syncobj ||
+ (!fence->signalled && !fence->submitted))
+ return false;
+ }
+ return true;
+}
+
+static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
+{
+ for (uint32_t i = 0; i < fenceCount; ++i) {
+ RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+ if (fence->syncobj == 0 && fence->temp_syncobj == 0)
return false;
}
return true;
RADV_FROM_HANDLE(radv_device, device, _device);
timeout = radv_get_absolute_timeout(timeout);
- if (device->always_use_syncobj) {
+ if (device->always_use_syncobj &&
+ radv_all_fences_syncobj(fenceCount, pFences))
+ {
uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
if (!handles)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
if (fence->signalled)
continue;
- if (!fence->submitted) {
- while(radv_get_current_time() <= timeout && !fence->submitted)
- /* Do nothing */;
+ if (fence->fence) {
+ if (!fence->submitted) {
+ while(radv_get_current_time() <= timeout &&
+ !fence->submitted)
+ /* Do nothing */;
- if (!fence->submitted)
- return VK_TIMEOUT;
+ if (!fence->submitted)
+ return VK_TIMEOUT;
+
+ /* Recheck as it may have been set by
+ * submitting operations. */
+
+ if (fence->signalled)
+ continue;
+ }
- /* Recheck as it may have been set by submitting operations. */
- if (fence->signalled)
- continue;
+ expired = device->ws->fence_wait(device->ws,
+ fence->fence,
+ true, timeout);
+ if (!expired)
+ return VK_TIMEOUT;
}
- expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
- if (!expired)
- return VK_TIMEOUT;
+ if (fence->fence_wsi) {
+ VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
+ if (result != VK_SUCCESS)
+ return result;
+ }
fence->signalled = true;
}
return VK_SUCCESS;
if (!fence->submitted)
return VK_NOT_READY;
- if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
- return VK_NOT_READY;
+ if (fence->fence) {
+ if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
+ return VK_NOT_READY;
+ }
+ if (fence->fence_wsi) {
+ VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
+ if (result != VK_SUCCESS) {
+ if (result == VK_TIMEOUT)
+ return VK_NOT_READY;
+ return result;
+ }
+ }
return VK_SUCCESS;
}