X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_device.c;h=28a941e219c3c1b11a1c5e2d9d04271f8b54c3cd;hp=630f35ff7a8dbf4168e6d812f03824c2dc217c9c;hb=3f6a17a8fcdb891409794bb7925a6fb4947b54e1;hpb=b70829708ac0598d17804f471d512fdfb44f331f diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 630f35ff7a8..28a941e219c 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -29,7 +29,9 @@ #include #include #include +#include "radv_debug.h" #include "radv_private.h" +#include "radv_shader.h" #include "radv_cs.h" #include "util/disk_cache.h" #include "util/strtod.h" @@ -63,12 +65,15 @@ radv_device_get_cache_uuid(enum radeon_family family, void *uuid) } static void -radv_get_device_uuid(drmDevicePtr device, void *uuid) { - memset(uuid, 0, VK_UUID_SIZE); - memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2); - memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1); - memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1); - memcpy((char*)uuid + 4, &device->businfo.pci->func, 1); +radv_get_driver_uuid(void *uuid) +{ + ac_compute_driver_uuid(uuid, VK_UUID_SIZE); +} + +static void +radv_get_device_uuid(struct radeon_info *info, void *uuid) +{ + ac_compute_device_uuid(info, uuid, VK_UUID_SIZE); } static const VkExtensionProperties instance_extensions[] = { @@ -91,7 +96,7 @@ static const VkExtensionProperties instance_extensions[] = { #ifdef VK_USE_PLATFORM_WAYLAND_KHR { .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 5, + .specVersion = 6, }, #endif { @@ -102,6 +107,10 @@ static const VkExtensionProperties instance_extensions[] = { .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, .specVersion = 1, }, + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME, + .specVersion = 1, + }, }; static const VkExtensionProperties common_device_extensions[] = { @@ -137,10 +146,6 @@ static const VkExtensionProperties common_device_extensions[] = { .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, .specVersion = 1, }, - { - .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME, - .specVersion = 1, - }, { .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, .specVersion = 1, @@ -157,6 +162,44 @@ static const VkExtensionProperties common_device_extensions[] = { .extensionName = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, .specVersion = 1, }, + { + .extensionName = VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, + .specVersion = 1, + }, +}; + +static const VkExtensionProperties rasterization_order_extension[] ={ + { + .extensionName = VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME, + .specVersion = 1, + }, +}; + +static const VkExtensionProperties ext_sema_device_extensions[] = { + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, + .specVersion = 1, + }, + { + .extensionName = VK_KHX_MULTIVIEW_EXTENSION_NAME, + .specVersion = 1, + }, }; static VkResult @@ -293,7 +336,7 @@ radv_physical_device_init(struct radv_physical_device *device, goto fail; } - if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) { + if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) { radv_finish_wsi(device); device->ws->destroy(device->ws); result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, @@ -308,10 +351,29 @@ radv_physical_device_init(struct radv_physical_device *device, if (result != VK_SUCCESS) goto fail; + if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) { + result = radv_extensions_register(instance, + &device->extensions, + rasterization_order_extension, + ARRAY_SIZE(rasterization_order_extension)); + if (result != VK_SUCCESS) + goto fail; + } + + if (device->rad_info.has_syncobj) { + result = radv_extensions_register(instance, + &device->extensions, + ext_sema_device_extensions, + ARRAY_SIZE(ext_sema_device_extensions)); + if (result != VK_SUCCESS) + goto fail; + } + fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); device->name = get_chip_name(device->rad_info.family); - radv_get_device_uuid(drm_device, device->device_uuid); + radv_get_driver_uuid(&device->device_uuid); + radv_get_device_uuid(&device->rad_info, &device->device_uuid); if (device->rad_info.family == CHIP_STONEY || device->rad_info.chip_class >= GFX9) { @@ -373,15 +435,33 @@ static const struct debug_control radv_debug_options[] = { {"unsafemath", RADV_DEBUG_UNSAFE_MATH}, {"allbos", RADV_DEBUG_ALL_BOS}, {"noibs", RADV_DEBUG_NO_IBS}, + {"spirv", RADV_DEBUG_DUMP_SPIRV}, + {"vmfaults", RADV_DEBUG_VM_FAULTS}, + {"zerovram", RADV_DEBUG_ZERO_VRAM}, + {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, {NULL, 0} }; +const char * +radv_get_debug_option_name(int id) +{ + assert(id < ARRAY_SIZE(radv_debug_options) - 1); + return radv_debug_options[id].string; +} + static const struct debug_control radv_perftest_options[] = { - {"batchchain", RADV_PERFTEST_BATCHCHAIN}, + {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN}, {"sisched", RADV_PERFTEST_SISCHED}, {NULL, 0} }; +const char * +radv_get_perftest_option_name(int id) +{ + assert(id < ARRAY_SIZE(radv_debug_options) - 1); + return radv_perftest_options[id].string; +} + VkResult radv_CreateInstance( const VkInstanceCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -484,7 +564,7 @@ radv_enumerate_devices(struct radv_instance *instance) for (unsigned i = 0; i < (unsigned)max_devices; i++) { if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && devices[i]->bustype == DRM_BUS_PCI && - devices[i]->deviceinfo.pci->vendor_id == 0x1002) { + devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) { result = radv_physical_device_init(instance->physicalDevices + instance->physicalDeviceCount, @@ -543,7 +623,7 @@ void radv_GetPhysicalDeviceFeatures( .independentBlend = true, .geometryShader = !is_gfx9, .tessellationShader = !is_gfx9, - .sampleRateShading = false, + .sampleRateShading = true, .dualSrcBlend = true, .logicOp = true, .multiDrawIndirect = true, @@ -589,6 +669,25 @@ void radv_GetPhysicalDeviceFeatures2KHR( VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR *pFeatures) { + vk_foreach_struct(ext, pFeatures->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: { + VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext; + features->variablePointersStorageBuffer = true; + features->variablePointers = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: { + VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext; + features->multiview = true; + features->multiviewGeometryShader = true; + features->multiviewTessellationShader = true; + break; + } + default: + break; + } + } return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); } @@ -728,7 +827,7 @@ void radv_GetPhysicalDeviceProperties( *pProperties = (VkPhysicalDeviceProperties) { .apiVersion = VK_MAKE_VERSION(1, 0, 42), .driverVersion = vk_get_driver_version(), - .vendorID = 0x1002, + .vendorID = ATI_VENDOR_ID, .deviceID = pdevice->rad_info.pci_id, .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, .limits = limits, @@ -736,7 +835,7 @@ void radv_GetPhysicalDeviceProperties( }; strcpy(pProperties->deviceName, pdevice->name); - memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE); + memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); } void radv_GetPhysicalDeviceProperties2KHR( @@ -756,11 +855,17 @@ void radv_GetPhysicalDeviceProperties2KHR( } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: { VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext; - radv_device_get_cache_uuid(0, properties->driverUUID); + memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); properties->deviceLUIDValid = false; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: { + VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext; + properties->maxMultiviewViewCount = MAX_VIEWS; + properties->maxMultiviewInstanceIndex = INT_MAX; + break; + } default: break; } @@ -889,15 +994,17 @@ void radv_GetPhysicalDeviceMemoryProperties( }; STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); + uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size, + physical_device->rad_info.vram_vis_size); pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT; pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) { .size = physical_device->rad_info.vram_size - - physical_device->rad_info.vram_vis_size, + visible_vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) { - .size = physical_device->rad_info.vram_vis_size, + .size = visible_vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) { @@ -936,6 +1043,8 @@ radv_queue_finish(struct radv_queue *queue) if (queue->hw_ctx) queue->device->ws->ctx_destroy(queue->hw_ctx); + if (queue->initial_full_flush_preamble_cs) + queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); if (queue->initial_preamble_cs) queue->device->ws->cs_destroy(queue->initial_preamble_cs); if (queue->continue_preamble_cs) @@ -1006,6 +1115,19 @@ VkResult radv_CreateDevice( return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + VkPhysicalDeviceFeatures supported_features; + radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *)&supported_features; + VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; + unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); + } + } + device = vk_alloc2(&physical_device->instance->alloc, pAllocator, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -1026,6 +1148,9 @@ VkResult radv_CreateDevice( else device->alloc = physical_device->instance->alloc; + mtx_init(&device->shader_slab_mutex, mtx_plain); + list_inithead(&device->shader_slabs); + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; uint32_t qfi = queue_create->queueFamilyIndex; @@ -1078,6 +1203,11 @@ VkResult radv_CreateDevice( device->physical_device->rad_info.chip_class >= VI && device->physical_device->rad_info.max_se >= 2; + if (getenv("RADV_TRACE_FILE")) { + if (!radv_init_trace(device)) + goto fail; + } + result = radv_device_init_meta(device); if (result != VK_SUCCESS) goto fail; @@ -1098,52 +1228,6 @@ VkResult radv_CreateDevice( break; } device->ws->cs_finalize(device->empty_cs[family]); - - device->flush_cs[family] = device->ws->cs_create(device->ws, family); - switch (family) { - case RADV_QUEUE_GENERAL: - case RADV_QUEUE_COMPUTE: - si_cs_emit_cache_flush(device->flush_cs[family], - false, - device->physical_device->rad_info.chip_class, - NULL, 0, - family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK, - RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); - break; - } - device->ws->cs_finalize(device->flush_cs[family]); - - device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family); - switch (family) { - case RADV_QUEUE_GENERAL: - case RADV_QUEUE_COMPUTE: - si_cs_emit_cache_flush(device->flush_shader_cs[family], - false, - device->physical_device->rad_info.chip_class, - NULL, 0, - family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK, - family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) | - RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2); - break; - } - device->ws->cs_finalize(device->flush_shader_cs[family]); - } - - if (getenv("RADV_TRACE_FILE")) { - device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8, - RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); - if (!device->trace_bo) - goto fail; - - device->trace_id_ptr = device->ws->buffer_map(device->trace_bo); - if (!device->trace_id_ptr) - goto fail; } if (device->physical_device->rad_info.chip_class >= CIK) @@ -1206,16 +1290,14 @@ void radv_DestroyDevice( vk_free(&device->alloc, device->queues[i]); if (device->empty_cs[i]) device->ws->cs_destroy(device->empty_cs[i]); - if (device->flush_cs[i]) - device->ws->cs_destroy(device->flush_cs[i]); - if (device->flush_shader_cs[i]) - device->ws->cs_destroy(device->flush_shader_cs[i]); } radv_device_finish_meta(device); VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); + radv_destroy_shader_slabs(device); + vk_free(&device->alloc, device); } @@ -1298,21 +1380,6 @@ void radv_GetDeviceQueue( *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]); } -static void radv_dump_trace(struct radv_device *device, - struct radeon_winsys_cs *cs) -{ - const char *filename = getenv("RADV_TRACE_FILE"); - FILE *f = fopen(filename, "w"); - if (!f) { - fprintf(stderr, "Failed to write trace dump to %s\n", filename); - return; - } - - fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); - device->ws->cs_dump(cs, f, *device->trace_id_ptr); - fclose(f); -} - static void fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, @@ -1331,13 +1398,13 @@ fill_geom_tess_rings(struct radv_queue *queue, uint32_t *desc = &map[4]; if (esgs_ring_bo) - esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo); + esgs_va = radv_buffer_get_va(esgs_ring_bo); if (gsvs_ring_bo) - gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo); + gsvs_va = radv_buffer_get_va(gsvs_ring_bo); if (tess_factor_ring_bo) - tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo); + tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo); if (tess_offchip_ring_bo) - tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo); + tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo); /* stride 0, num records - size, add tid, swizzle, elsize4, index stride 64 */ @@ -1515,6 +1582,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_sample_positions, + struct radeon_winsys_cs **initial_full_flush_preamble_cs, struct radeon_winsys_cs **initial_preamble_cs, struct radeon_winsys_cs **continue_preamble_cs) { @@ -1525,7 +1593,7 @@ radv_get_preamble_cs(struct radv_queue *queue, struct radeon_winsys_bo *gsvs_ring_bo = NULL; struct radeon_winsys_bo *tess_factor_ring_bo = NULL; struct radeon_winsys_bo *tess_offchip_ring_bo = NULL; - struct radeon_winsys_cs *dest_cs[2] = {0}; + struct radeon_winsys_cs *dest_cs[3] = {0}; bool add_tess_rings = false, add_sample_positions = false; unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0; unsigned max_offchip_buffers; @@ -1550,6 +1618,7 @@ radv_get_preamble_cs(struct radv_queue *queue, gsvs_ring_size <= queue->gsvs_ring_size && !add_tess_rings && !add_sample_positions && queue->initial_preamble_cs) { + *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; *initial_preamble_cs = queue->initial_preamble_cs; *continue_preamble_cs = queue->continue_preamble_cs; if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) @@ -1651,7 +1720,7 @@ radv_get_preamble_cs(struct radv_queue *queue, } else descriptor_bo = queue->descriptor_bo; - for(int i = 0; i < 2; ++i) { + for(int i = 0; i < 3; ++i) { struct radeon_winsys_cs *cs = NULL; cs = queue->device->ws->cs_create(queue->device->ws, queue->queue_family_index ? RING_COMPUTE : RING_GFX); @@ -1682,7 +1751,7 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); if (scratch_bo) { - uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo); + uint64_t scratch_va = radv_buffer_get_va(scratch_bo); uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1); map[0] = scratch_va; @@ -1720,7 +1789,7 @@ radv_get_preamble_cs(struct radv_queue *queue, } if (tess_factor_ring_bo) { - uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo); + uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo); if (queue->device->physical_device->rad_info.chip_class >= CIK) { radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tess_factor_ring_size / 4)); @@ -1749,7 +1818,7 @@ radv_get_preamble_cs(struct radv_queue *queue, R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0}; - uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo); + uint64_t va = radv_buffer_get_va(descriptor_bo); for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radeon_set_sh_reg_seq(cs, regs[i], 2); @@ -1759,7 +1828,7 @@ radv_get_preamble_cs(struct radv_queue *queue, } if (compute_scratch_bo) { - uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo); + uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo); uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1); @@ -1770,7 +1839,19 @@ radv_get_preamble_cs(struct radv_queue *queue, radeon_emit(cs, rsrc1); } - if (!i) { + if (i == 0) { + si_cs_emit_cache_flush(cs, + false, + queue->device->physical_device->rad_info.chip_class, + NULL, 0, + queue->queue_family_index == RING_COMPUTE && + queue->device->physical_device->rad_info.chip_class >= CIK, + (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | + RADV_CMD_FLAG_INV_ICACHE | + RADV_CMD_FLAG_INV_SMEM_L1 | + RADV_CMD_FLAG_INV_VMEM_L1 | + RADV_CMD_FLAG_INV_GLOBAL_L2); + } else if (i == 1) { si_cs_emit_cache_flush(cs, false, queue->device->physical_device->rad_info.chip_class, @@ -1787,14 +1868,18 @@ radv_get_preamble_cs(struct radv_queue *queue, goto fail; } + if (queue->initial_full_flush_preamble_cs) + queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs); + if (queue->initial_preamble_cs) queue->device->ws->cs_destroy(queue->initial_preamble_cs); if (queue->continue_preamble_cs) queue->device->ws->cs_destroy(queue->continue_preamble_cs); - queue->initial_preamble_cs = dest_cs[0]; - queue->continue_preamble_cs = dest_cs[1]; + queue->initial_full_flush_preamble_cs = dest_cs[0]; + queue->initial_preamble_cs = dest_cs[1]; + queue->continue_preamble_cs = dest_cs[2]; if (scratch_bo != queue->scratch_bo) { if (queue->scratch_bo) @@ -1843,6 +1928,7 @@ radv_get_preamble_cs(struct radv_queue *queue, if (add_sample_positions) queue->has_sample_positions = true; + *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs; *initial_preamble_cs = queue->initial_preamble_cs; *continue_preamble_cs = queue->continue_preamble_cs; if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) @@ -1869,6 +1955,89 @@ fail: return VK_ERROR_OUT_OF_DEVICE_MEMORY; } +static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts, + int num_sems, + const VkSemaphore *sems, + bool reset_temp) +{ + int syncobj_idx = 0, sem_idx = 0; + + if (num_sems == 0) + return VK_SUCCESS; + for (uint32_t i = 0; i < num_sems; i++) { + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); + + if (sem->temp_syncobj || sem->syncobj) + counts->syncobj_count++; + else + counts->sem_count++; + } + + if (counts->syncobj_count) { + counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count); + if (!counts->syncobj) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + if (counts->sem_count) { + counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count); + if (!counts->sem) { + free(counts->syncobj); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + for (uint32_t i = 0; i < num_sems; i++) { + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]); + + if (sem->temp_syncobj) { + counts->syncobj[syncobj_idx++] = sem->temp_syncobj; + if (reset_temp) { + /* after we wait on a temp import - drop it */ + sem->temp_syncobj = 0; + } + } + else if (sem->syncobj) + counts->syncobj[syncobj_idx++] = sem->syncobj; + else { + assert(sem->sem); + counts->sem[sem_idx++] = sem->sem; + } + } + + return VK_SUCCESS; +} + +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info) +{ + free(sem_info->wait.syncobj); + free(sem_info->wait.sem); + free(sem_info->signal.syncobj); + free(sem_info->signal.sem); +} + +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info, + int num_wait_sems, + const VkSemaphore *wait_sems, + int num_signal_sems, + const VkSemaphore *signal_sems) +{ + VkResult ret; + memset(sem_info, 0, sizeof(*sem_info)); + + ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true); + if (ret) + return ret; + ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false); + if (ret) + radv_free_sem_info(sem_info); + + /* caller can override these */ + sem_info->cs_emit_wait = true; + sem_info->cs_emit_signal = true; + return ret; +} + VkResult radv_QueueSubmit( VkQueue _queue, uint32_t submitCount, @@ -1884,7 +2053,7 @@ VkResult radv_QueueSubmit( uint32_t scratch_size = 0; uint32_t compute_scratch_size = 0; uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; - struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL; + struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL; VkResult result; bool fence_emitted = false; bool tess_rings_needed = false; @@ -1909,7 +2078,7 @@ VkResult radv_QueueSubmit( result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, tess_rings_needed, - sample_positions_needed, + sample_positions_needed, &initial_flush_preamble_cs, &initial_preamble_cs, &continue_preamble_cs); if (result != VK_SUCCESS) return result; @@ -1917,18 +2086,24 @@ VkResult radv_QueueSubmit( for (uint32_t i = 0; i < submitCount; i++) { struct radeon_winsys_cs **cs_array; bool do_flush = !i || pSubmits[i].pWaitDstStageMask; - bool can_patch = !do_flush; + bool can_patch = true; uint32_t advance; + struct radv_winsys_sem_info sem_info; + + result = radv_alloc_sem_info(&sem_info, + pSubmits[i].waitSemaphoreCount, + pSubmits[i].pWaitSemaphores, + pSubmits[i].signalSemaphoreCount, + pSubmits[i].pSignalSemaphores); + if (result != VK_SUCCESS) + return result; if (!pSubmits[i].commandBufferCount) { if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - pSubmits[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - pSubmits[i].signalSemaphoreCount, + &sem_info, false, base_fence); if (ret) { radv_loge("failed to submit CS %d\n", i); @@ -1936,42 +2111,37 @@ VkResult radv_QueueSubmit( } fence_emitted = true; } + radv_free_sem_info(&sem_info); continue; } cs_array = malloc(sizeof(struct radeon_winsys_cs *) * - (pSubmits[i].commandBufferCount + do_flush)); - - if(do_flush) - cs_array[0] = pSubmits[i].waitSemaphoreCount ? - queue->device->flush_shader_cs[queue->queue_family_index] : - queue->device->flush_cs[queue->queue_family_index]; + (pSubmits[i].commandBufferCount)); for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[i].pCommandBuffers[j]); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - cs_array[j + do_flush] = cmd_buffer->cs; + cs_array[j] = cmd_buffer->cs; if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) can_patch = false; } - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { + struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs; advance = MIN2(max_cs_submission, - pSubmits[i].commandBufferCount + do_flush - j); - bool b = j == 0; - bool e = j + advance == pSubmits[i].commandBufferCount + do_flush; + pSubmits[i].commandBufferCount - j); if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; + sem_info.cs_emit_wait = j == 0; + sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount; + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, - advance, initial_preamble_cs, continue_preamble_cs, - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, - b ? pSubmits[i].waitSemaphoreCount : 0, - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, - e ? pSubmits[i].signalSemaphoreCount : 0, + advance, initial_preamble, continue_preamble_cs, + &sem_info, can_patch, base_fence); if (ret) { @@ -1980,28 +2150,22 @@ VkResult radv_QueueSubmit( } fence_emitted = true; if (queue->device->trace_bo) { - bool success = queue->device->ws->ctx_wait_idle( - queue->hw_ctx, - radv_queue_family_to_ring( - queue->queue_family_index), - queue->queue_idx); - - if (!success) { /* Hang */ - radv_dump_trace(queue->device, cs_array[j]); - abort(); - } + radv_check_gpu_hangs(queue, cs_array[j]); } } + + radv_free_sem_info(&sem_info); free(cs_array); } if (fence) { - if (!fence_emitted) + if (!fence_emitted) { + struct radv_winsys_sem_info sem_info = {0}; ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, NULL, 0, NULL, 0, + 1, NULL, NULL, &sem_info, false, base_fence); - + } fence->submitted = true; } @@ -2123,8 +2287,10 @@ VkResult radv_AllocateMemory( if (!mem->bo) { result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; goto fail; - } else + } else { + close(import_info->fd); goto out_success; + } } uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); @@ -2336,44 +2502,74 @@ void radv_GetDeviceMemoryCommitment( *pCommittedMemoryInBytes = 0; } +VkResult radv_BindBufferMemory2KHR(VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfoKHR *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); + RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer); + + if (mem) { + buffer->bo = mem->bo; + buffer->offset = pBindInfos[i].memoryOffset; + } else { + buffer->bo = NULL; + } + } + return VK_SUCCESS; +} + VkResult radv_BindBufferMemory( VkDevice device, - VkBuffer _buffer, - VkDeviceMemory _memory, + VkBuffer buffer, + VkDeviceMemory memory, VkDeviceSize memoryOffset) { - RADV_FROM_HANDLE(radv_device_memory, mem, _memory); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + const VkBindBufferMemoryInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR, + .buffer = buffer, + .memory = memory, + .memoryOffset = memoryOffset + }; - if (mem) { - buffer->bo = mem->bo; - buffer->offset = memoryOffset; - } else { - buffer->bo = NULL; - buffer->offset = 0; - } + return radv_BindBufferMemory2KHR(device, 1, &info); +} +VkResult radv_BindImageMemory2KHR(VkDevice device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfoKHR *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); + RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image); + + if (mem) { + image->bo = mem->bo; + image->offset = pBindInfos[i].memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } + } return VK_SUCCESS; } + VkResult radv_BindImageMemory( VkDevice device, - VkImage _image, - VkDeviceMemory _memory, + VkImage image, + VkDeviceMemory memory, VkDeviceSize memoryOffset) { - RADV_FROM_HANDLE(radv_device_memory, mem, _memory); - RADV_FROM_HANDLE(radv_image, image, _image); - - if (mem) { - image->bo = mem->bo; - image->offset = memoryOffset; - } else { - image->bo = NULL; - image->offset = 0; - } + const VkBindImageMemoryInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR, + .image = image, + .memory = memory, + .memoryOffset = memoryOffset + }; - return VK_SUCCESS; + return radv_BindImageMemory2KHR(device, 1, &info); } @@ -2429,6 +2625,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, bool fence_emitted = false; for (uint32_t i = 0; i < bindInfoCount; ++i) { + struct radv_winsys_sem_info sem_info; for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) { radv_sparse_buffer_bind_memory(queue->device, pBindInfo[i].pBufferBinds + j); @@ -2439,19 +2636,28 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device, pBindInfo[i].pImageOpaqueBinds + j); } + VkResult result; + result = radv_alloc_sem_info(&sem_info, + pBindInfo[i].waitSemaphoreCount, + pBindInfo[i].pWaitSemaphores, + pBindInfo[i].signalSemaphoreCount, + pBindInfo[i].pSignalSemaphores); + if (result != VK_SUCCESS) + return result; + if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) { queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx, &queue->device->empty_cs[queue->queue_family_index], 1, NULL, NULL, - (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores, - pBindInfo[i].waitSemaphoreCount, - (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores, - pBindInfo[i].signalSemaphoreCount, + &sem_info, false, base_fence); fence_emitted = true; if (fence) fence->submitted = true; } + + radv_free_sem_info(&sem_info); + } if (fence && !fence_emitted) { @@ -2588,13 +2794,38 @@ VkResult radv_CreateSemaphore( VkSemaphore* pSemaphore) { RADV_FROM_HANDLE(radv_device, device, _device); - struct radeon_winsys_sem *sem; + const VkExportSemaphoreCreateInfoKHR *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR); + VkExternalSemaphoreHandleTypeFlagsKHR handleTypes = + export ? export->handleTypes : 0; - sem = device->ws->create_sem(device->ws); + struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, + sizeof(*sem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sem) return VK_ERROR_OUT_OF_HOST_MEMORY; - *pSemaphore = radeon_winsys_sem_to_handle(sem); + sem->temp_syncobj = 0; + /* create a syncobject if we are going to export this semaphore */ + if (handleTypes) { + assert (device->physical_device->rad_info.has_syncobj); + assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + int ret = device->ws->create_syncobj(device->ws, &sem->syncobj); + if (ret) { + vk_free2(&device->alloc, pAllocator, sem); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + sem->sem = NULL; + } else { + sem->sem = device->ws->create_sem(device->ws); + if (!sem->sem) { + vk_free2(&device->alloc, pAllocator, sem); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + sem->syncobj = 0; + } + + *pSemaphore = radv_semaphore_to_handle(sem); return VK_SUCCESS; } @@ -2604,11 +2835,15 @@ void radv_DestroySemaphore( const VkAllocationCallbacks* pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore); + RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore); if (!_semaphore) return; - device->ws->destroy_sem(sem); + if (sem->syncobj) + device->ws->destroy_syncobj(device->ws, sem->syncobj); + else + device->ws->destroy_sem(sem->sem); + vk_free2(&device->alloc, pAllocator, sem); } VkResult radv_CreateEvent( @@ -2627,7 +2862,7 @@ VkResult radv_CreateEvent( event->bo = device->ws->buffer_create(device->ws, 8, 8, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS); + RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS); if (!event->bo) { vk_free2(&device->alloc, pAllocator, event); return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -2771,7 +3006,9 @@ radv_initialise_color_surface(struct radv_device *device, /* Intensity is implemented as Red, so treat it that way. */ cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1); - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset; + + cb->cb_color_base = va >> 8; if (device->physical_device->rad_info.chip_class >= GFX9) { struct gfx9_surf_meta_flags meta; @@ -2785,12 +3022,15 @@ radv_initialise_color_surface(struct radv_device *device, S_028C74_RB_ALIGNED(meta.rb_aligned) | S_028C74_PIPE_ALIGNED(meta.pipe_aligned); - va += iview->image->surface.u.gfx9.surf_offset >> 8; + cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8; + cb->cb_color_base |= iview->image->surface.tile_swizzle; } else { const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip]; unsigned pitch_tile_max, slice_tile_max, tile_mode_index; - va += level_info->offset; + cb->cb_color_base += level_info->offset >> 8; + if (level_info->mode == RADEON_SURF_MODE_2D) + cb->cb_color_base |= iview->image->surface.tile_swizzle; pitch_tile_max = level_info->nblk_x / 8 - 1; slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1; @@ -2817,16 +3057,15 @@ radv_initialise_color_surface(struct radv_device *device, } } - cb->cb_color_base = va >> 8; - /* CMASK variables */ - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset; va += iview->image->cmask.offset; cb->cb_color_cmask = va >> 8; - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset; va += iview->image->dcc_offset; cb->cb_dcc_base = va >> 8; + cb->cb_dcc_base |= iview->image->surface.tile_swizzle; uint32_t max_slice = radv_surface_layer_count(iview); cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | @@ -2840,8 +3079,9 @@ radv_initialise_color_surface(struct radv_device *device, } if (iview->image->fmask.size) { - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; cb->cb_color_fmask = va >> 8; + cb->cb_color_fmask |= iview->image->fmask.tile_swizzle; } else { cb->cb_color_fmask = cb->cb_color_base; } @@ -2888,9 +3128,13 @@ radv_initialise_color_surface(struct radv_device *device, format != V_028C70_COLOR_24_8) | S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian); - if (iview->image->info.samples > 1) - if (iview->image->fmask.size) - cb->cb_color_info |= S_028C70_COMPRESSION(1); + if ((iview->image->info.samples > 1) && iview->image->fmask.size) { + cb->cb_color_info |= S_028C70_COMPRESSION(1); + if (device->physical_device->rad_info.chip_class == SI) { + unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); + cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); + } + } if (iview->image->cmask.size && !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) @@ -2920,15 +3164,15 @@ radv_initialise_color_surface(struct radv_device *device, } if (device->physical_device->rad_info.chip_class >= GFX9) { - uint32_t max_slice = radv_surface_layer_count(iview); - unsigned mip0_depth = iview->base_layer + max_slice - 1; + unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ? + (iview->extent.depth - 1) : (iview->image->info.array_size - 1); cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip); cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type); - cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) | - S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) | - S_028C68_MAX_MIP(iview->image->info.levels); + cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) | + S_028C68_MIP0_HEIGHT(iview->extent.height - 1) | + S_028C68_MAX_MIP(iview->image->info.levels - 1); cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch); @@ -2970,7 +3214,7 @@ radv_initialise_ds_surface(struct radv_device *device, } format = radv_translate_dbformat(iview->image->vk_format); - stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ? + stencil_format = iview->image->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; uint32_t max_slice = radv_surface_layer_count(iview); @@ -2980,7 +3224,7 @@ radv_initialise_ds_surface(struct radv_device *device, ds->db_htile_data_base = 0; ds->db_htile_surface = 0; - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset; s_offs = z_offs = va; if (device->physical_device->rad_info.chip_class >= GFX9) { @@ -3005,10 +3249,10 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->surface.htile_size && !level) { ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); - if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER)) + if (!iview->image->surface.has_stencil) /* Use all of the htile_buffer for depth if there's no stencil. */ ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + + va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->htile_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | @@ -3057,6 +3301,8 @@ radv_initialise_ds_surface(struct radv_device *device, ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); tile_mode_index = si_tile_mode_index(iview->image, level, true); ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); + if (stencil_only) + ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); } ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | @@ -3066,11 +3312,11 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->surface.htile_size && !level) { ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); - if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER)) + if (!iview->image->surface.has_stencil) /* Use all of the htile_buffer for depth if there's no stencil. */ ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); - va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + + va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->htile_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1); @@ -3388,3 +3634,61 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device, */ return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; } + +VkResult radv_ImportSemaphoreFdKHR(VkDevice _device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); + uint32_t syncobj_handle = 0; + assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + + int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle); + if (ret != 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; + + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) { + sem->temp_syncobj = syncobj_handle; + } else { + sem->syncobj = syncobj_handle; + } + close(pImportSemaphoreFdInfo->fd); + return VK_SUCCESS; +} + +VkResult radv_GetSemaphoreFdKHR(VkDevice _device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore); + int ret; + uint32_t syncobj_handle; + + assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR); + if (sem->temp_syncobj) + syncobj_handle = sem->temp_syncobj; + else + syncobj_handle = sem->syncobj; + ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); + if (ret) + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return VK_SUCCESS; +} + +void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, + VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties) +{ + if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; + } else { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + } +}