instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
} else if (!strcmp(engine_name, "Quantic Dream Engine")) {
/* Fix various artifacts in Detroit: Become Human */
- instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+ instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
+ RADV_DEBUG_DISCARD_TO_DEMOTE;
}
}
features->extendedDynamicState = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
+ VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
+ (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
+ features->robustImageAccess = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
+ VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
+ (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
+ features->shaderBufferFloat32Atomics = true;
+ features->shaderBufferFloat32AtomicAdd = false;
+ features->shaderBufferFloat64Atomics = true;
+ features->shaderBufferFloat64AtomicAdd = false;
+ features->shaderSharedFloat32Atomics = true;
+ features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
+ (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
+ features->shaderSharedFloat64Atomics = true;
+ features->shaderSharedFloat64AtomicAdd = false;
+ features->shaderImageFloat32Atomics = true;
+ features->shaderImageFloat32AtomicAdd = false;
+ features->sparseImageFloat32Atomics = false;
+ features->sparseImageFloat32AtomicAdd = false;
+ break;
+ }
default:
break;
}
p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
}
- /* Do not allow both preserving and flushing denorms because different
- * shaders in the same pipeline can have different settings and this
- * won't work for merged shaders. To make it work, this requires LLVM
+ /* With LLVM, do not allow both preserving and flushing denorms because
+ * different shaders in the same pipeline can have different settings and
+ * this won't work for merged shaders. To make it work, this requires LLVM
* support for changing the register. The same logic applies for the
* rounding modes because they are configured with the same config
- * register. TODO: we can enable a lot of these for ACO when it
- * supports all stages.
+ * register.
*/
p->shaderDenormFlushToZeroFloat32 = true;
- p->shaderDenormPreserveFloat32 = false;
+ p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
p->shaderRoundingModeRTEFloat32 = true;
- p->shaderRoundingModeRTZFloat32 = false;
+ p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
p->shaderSignedZeroInfNanPreserveFloat32 = true;
- p->shaderDenormFlushToZeroFloat16 = false;
+ p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
- p->shaderRoundingModeRTZFloat16 = false;
+ p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
- p->shaderDenormFlushToZeroFloat64 = false;
+ p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
- p->shaderRoundingModeRTZFloat64 = false;
+ p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
device->empty_cs[family] = device->ws->cs_create(device->ws, family);
+ if (!device->empty_cs[family])
+ goto fail;
+
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(device->empty_cs[family], 0);
break;
}
- device->ws->cs_finalize(device->empty_cs[family]);
+
+ result = device->ws->cs_finalize(device->empty_cs[family]);
+ if (result != VK_SUCCESS)
+ goto fail;
}
if (device->physical_device->rad_info.chip_class >= GFX7)
VkFence _fence,
bool is_signal)
{
- int syncobj_idx = 0, sem_idx = 0;
+ int syncobj_idx = 0, non_reset_idx = 0, sem_idx = 0;
if (num_sems == 0 && _fence == VK_NULL_HANDLE)
return VK_SUCCESS;
switch(sems[i]->kind) {
case RADV_SEMAPHORE_SYNCOBJ:
counts->syncobj_count++;
+ counts->syncobj_reset_count++;
break;
case RADV_SEMAPHORE_WINSYS:
counts->sem_count++;
}
}
+ non_reset_idx = counts->syncobj_reset_count;
+
for (uint32_t i = 0; i < num_sems; i++) {
switch(sems[i]->kind) {
case RADV_SEMAPHORE_NONE:
pthread_mutex_unlock(&sems[i]->timeline.mutex);
if (point) {
- counts->syncobj[syncobj_idx++] = point->syncobj;
+ counts->syncobj[non_reset_idx++] = point->syncobj;
} else {
/* Explicitly remove the semaphore so we might not find
* a point later post-submit. */
fence->temporary.kind != RADV_FENCE_NONE ?
&fence->temporary : &fence->permanent;
if (part->kind == RADV_FENCE_SYNCOBJ)
- counts->syncobj[syncobj_idx++] = part->syncobj;
+ counts->syncobj[non_reset_idx++] = part->syncobj;
}
- assert(syncobj_idx <= counts->syncobj_count);
- counts->syncobj_count = syncobj_idx;
+ assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
+ counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
return VK_SUCCESS;
}
}
}
-static void
+static VkResult
radv_sparse_buffer_bind_memory(struct radv_device *device,
const VkSparseBufferMemoryBindInfo *bind)
{
RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
+ VkResult result;
for (uint32_t i = 0; i < bind->bindCount; ++i) {
struct radv_device_memory *mem = NULL;
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
- device->ws->buffer_virtual_bind(buffer->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
+ result = device->ws->buffer_virtual_bind(buffer->bo,
+ bind->pBinds[i].resourceOffset,
+ bind->pBinds[i].size,
+ mem ? mem->bo : NULL,
+ bind->pBinds[i].memoryOffset);
+ if (result != VK_SUCCESS)
+ return result;
}
+
+ return VK_SUCCESS;
}
-static void
+static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device *device,
const VkSparseImageOpaqueMemoryBindInfo *bind)
{
RADV_FROM_HANDLE(radv_image, image, bind->image);
+ VkResult result;
for (uint32_t i = 0; i < bind->bindCount; ++i) {
struct radv_device_memory *mem = NULL;
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
- device->ws->buffer_virtual_bind(image->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
+ result = device->ws->buffer_virtual_bind(image->bo,
+ bind->pBinds[i].resourceOffset,
+ bind->pBinds[i].size,
+ mem ? mem->bo : NULL,
+ bind->pBinds[i].memoryOffset);
+ if (result != VK_SUCCESS)
+ return result;
}
+
+ return VK_SUCCESS;
}
static VkResult
uint32_t advance;
struct radv_winsys_sem_info sem_info;
VkResult result;
- int ret;
struct radeon_cmdbuf *initial_preamble_cs = NULL;
struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
struct radeon_cmdbuf *continue_preamble_cs = NULL;
goto fail;
for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
- radv_sparse_buffer_bind_memory(queue->device,
- submission->buffer_binds + i);
+ result = radv_sparse_buffer_bind_memory(queue->device,
+ submission->buffer_binds + i);
+ if (result != VK_SUCCESS)
+ goto fail;
}
for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
- radv_sparse_image_opaque_bind_memory(queue->device,
- submission->image_opaque_binds + i);
+ result = radv_sparse_image_opaque_bind_memory(queue->device,
+ submission->image_opaque_binds + i);
+ if (result != VK_SUCCESS)
+ goto fail;
}
if (!submission->cmd_buffer_count) {
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- &sem_info, NULL,
- false, base_fence);
- if (ret) {
- radv_loge("failed to submit CS\n");
- abort();
- }
-
- goto success;
+ result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
+ &queue->device->empty_cs[queue->queue_family_index],
+ 1, NULL, NULL,
+ &sem_info, NULL,
+ false, base_fence);
+ if (result != VK_SUCCESS)
+ goto fail;
} else {
struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
(submission->cmd_buffer_count));
bo_list = &queue->device->bo_list.list;
}
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
- advance, initial_preamble, continue_preamble_cs,
- &sem_info, bo_list,
- can_patch, base_fence);
+ result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
+ advance, initial_preamble, continue_preamble_cs,
+ &sem_info, bo_list,
+ can_patch, base_fence);
if (unlikely(queue->device->use_global_bo_list))
pthread_mutex_unlock(&queue->device->bo_list.mutex);
- if (ret) {
- radv_loge("failed to submit CS\n");
- abort();
- }
+ if (result != VK_SUCCESS)
+ goto fail;
+
if (queue->device->trace_bo) {
radv_check_gpu_hangs(queue, cs_array[j]);
}
free(cs_array);
}
-success:
radv_free_temp_syncobjs(queue->device,
submission->temporary_semaphore_part_count,
submission->temporary_semaphore_parts);
return VK_SUCCESS;
fail:
+ if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
+ /* When something bad happened during the submission, such as
+ * an out of memory issue, it might be hard to recover from
+ * this inconsistent state. To avoid this sort of problem, we
+ * assume that we are in a really bad situation and return
+ * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
+ * to submit the same job again to this device.
+ */
+ result = VK_ERROR_DEVICE_LOST;
+ }
+
radv_free_temp_syncobjs(queue->device,
submission->temporary_semaphore_part_count,
submission->temporary_semaphore_parts);
free(submission);
- return VK_ERROR_DEVICE_LOST;
+ return result;
}
static VkResult
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
struct radv_winsys_sem_info sem_info;
VkResult result;
- int ret;
result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
0, NULL, VK_NULL_HANDLE);
if (result != VK_SUCCESS)
return false;
- ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL,
- NULL, &sem_info, NULL, false, NULL);
+ result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
+ NULL, NULL, &sem_info, NULL,
+ false, NULL);
radv_free_sem_info(&sem_info);
- return !ret;
+ if (result != VK_SUCCESS)
+ return false;
+
+ return true;
+
}
/* Signals fence as soon as all the work currently put on queue is done. */
struct radv_timeline_point *ret = NULL;
struct radv_timeline_point *prev = NULL;
+ int r;
if (p <= timeline->highest_signaled)
return NULL;
if (list_is_empty(&timeline->free_points)) {
ret = malloc(sizeof(struct radv_timeline_point));
- device->ws->create_syncobj(device->ws, false, &ret->syncobj);
+ r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
+ if (r) {
+ free(ret);
+ return NULL;
+ }
} else {
ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
list_del(&ret->list);